@deepagents/text2sql 0.20.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -685,6 +685,14 @@ var BLOCKED_DB_CLIENT_COMMANDS = /* @__PURE__ */ new Set([
685
685
  ]);
686
686
  var BLOCKED_RAW_SQL_COMMANDS = /* @__PURE__ */ new Set(["select", "with"]);
687
687
  var ALLOWED_SQL_PROXY_SUBCOMMANDS = /* @__PURE__ */ new Set(["run", "validate"]);
688
+ var SHELL_INTERPRETER_COMMANDS = /* @__PURE__ */ new Set([
689
+ "bash",
690
+ "sh",
691
+ "zsh",
692
+ "dash",
693
+ "ksh"
694
+ ]);
695
+ var WRAPPER_COMMANDS = /* @__PURE__ */ new Set(["env", "command", "eval"]);
688
696
  var SQL_PROXY_ENFORCEMENT_MESSAGE = [
689
697
  "Direct database querying through bash is blocked.",
690
698
  "Use SQL proxy commands in this order:",
@@ -740,82 +748,94 @@ function isScriptNode(value) {
740
748
  const node = value;
741
749
  return node.type === "Script" && Array.isArray(node.statements);
742
750
  }
743
- function scriptContainsBlockedCommand(script, context) {
744
- return statementsContainBlockedCommand(script.statements, context);
751
+ function scriptContainsBlockedCommand(script, context, mode = "blocked-only") {
752
+ return statementsContainBlockedCommand(script.statements, context, mode);
745
753
  }
746
- function statementsContainBlockedCommand(statements, context) {
754
+ function statementsContainBlockedCommand(statements, context, mode) {
747
755
  for (const statement of statements) {
748
- if (statementContainsBlockedCommand(statement, context)) {
756
+ if (statementContainsBlockedCommand(statement, context, mode)) {
749
757
  return true;
750
758
  }
751
759
  }
752
760
  return false;
753
761
  }
754
- function statementContainsBlockedCommand(statement, context) {
762
+ function statementContainsBlockedCommand(statement, context, mode) {
755
763
  for (const pipeline of statement.pipelines) {
756
- if (pipelineContainsBlockedCommand(pipeline, context)) {
764
+ if (pipelineContainsBlockedCommand(pipeline, context, mode)) {
757
765
  return true;
758
766
  }
759
767
  }
760
768
  return false;
761
769
  }
762
- function pipelineContainsBlockedCommand(pipeline, context) {
763
- for (const command of pipeline.commands) {
770
+ function pipelineContainsBlockedCommand(pipeline, context, mode) {
771
+ for (const [index2, command] of pipeline.commands.entries()) {
764
772
  if (command.type === "FunctionDef") {
765
773
  context.functionDefinitions.set(command.name, command);
766
774
  continue;
767
775
  }
768
- if (commandContainsBlockedCommand(command, context)) {
776
+ if (commandContainsBlockedCommand(command, context, mode, {
777
+ stdinFromPipe: index2 > 0
778
+ })) {
769
779
  return true;
770
780
  }
771
781
  }
772
782
  return false;
773
783
  }
774
- function stringCommandContainsBlockedCommand(command, context) {
784
+ function stringCommandContainsBlockedCommand(command, context, mode = "blocked-only") {
775
785
  let script;
776
786
  try {
777
787
  script = parse(command);
778
788
  } catch {
779
789
  return false;
780
790
  }
781
- return scriptContainsBlockedCommand(script, cloneInspectionContext(context));
791
+ return scriptContainsBlockedCommand(
792
+ script,
793
+ cloneInspectionContext(context),
794
+ mode
795
+ );
782
796
  }
783
- function wordContainsBlockedCommand(word, context) {
797
+ function wordContainsBlockedCommand(word, context, mode) {
784
798
  if (!word) {
785
799
  return false;
786
800
  }
787
801
  return wordPartContainsBlockedCommand(
788
802
  word.parts,
789
- context
803
+ context,
804
+ mode
790
805
  );
791
806
  }
792
- function wordPartContainsBlockedCommand(parts, context) {
807
+ function wordPartContainsBlockedCommand(parts, context, mode) {
793
808
  for (const part of parts) {
794
- if (partContainsBlockedCommand(part, context)) {
809
+ if (partContainsBlockedCommand(part, context, mode)) {
795
810
  return true;
796
811
  }
797
812
  }
798
813
  return false;
799
814
  }
800
- function partContainsBlockedCommand(node, context) {
815
+ function partContainsBlockedCommand(node, context, mode) {
801
816
  const type = node.type;
802
817
  if (type === "CommandSubstitution" || type === "ProcessSubstitution") {
803
818
  if (isScriptNode(node.body)) {
804
819
  return scriptContainsBlockedCommand(
805
820
  node.body,
806
- cloneInspectionContext(context)
821
+ cloneInspectionContext(context),
822
+ mode
807
823
  );
808
824
  }
809
825
  return false;
810
826
  }
811
827
  if (type === "ArithCommandSubst" && typeof node.command === "string") {
812
- return stringCommandContainsBlockedCommand(node.command, context);
828
+ return stringCommandContainsBlockedCommand(node.command, context, mode);
813
829
  }
814
830
  for (const value of Object.values(node)) {
815
831
  if (Array.isArray(value)) {
816
832
  for (const item of value) {
817
833
  if (typeof item === "object" && item !== null) {
818
- if (partContainsBlockedCommand(item, context)) {
834
+ if (partContainsBlockedCommand(
835
+ item,
836
+ context,
837
+ mode
838
+ )) {
819
839
  return true;
820
840
  }
821
841
  }
@@ -823,14 +843,18 @@ function partContainsBlockedCommand(node, context) {
823
843
  continue;
824
844
  }
825
845
  if (typeof value === "object" && value !== null) {
826
- if (partContainsBlockedCommand(value, context)) {
846
+ if (partContainsBlockedCommand(
847
+ value,
848
+ context,
849
+ mode
850
+ )) {
827
851
  return true;
828
852
  }
829
853
  }
830
854
  }
831
855
  return false;
832
856
  }
833
- function functionInvocationContainsBlockedCommand(functionName, context) {
857
+ function functionInvocationContainsBlockedCommand(functionName, context, mode) {
834
858
  const definition = context.functionDefinitions.get(functionName);
835
859
  if (!definition) {
836
860
  return false;
@@ -840,52 +864,306 @@ function functionInvocationContainsBlockedCommand(functionName, context) {
840
864
  }
841
865
  const invocationContext = cloneInspectionContext(context);
842
866
  invocationContext.callStack.add(functionName);
843
- return commandContainsBlockedCommand(definition.body, invocationContext);
867
+ return commandContainsBlockedCommand(
868
+ definition.body,
869
+ invocationContext,
870
+ mode,
871
+ { stdinFromPipe: false }
872
+ );
873
+ }
874
+ function isAsciiLetter(character) {
875
+ const charCode = character.charCodeAt(0);
876
+ return charCode >= 65 && charCode <= 90 || charCode >= 97 && charCode <= 122;
877
+ }
878
+ function isAsciiDigit(character) {
879
+ const charCode = character.charCodeAt(0);
880
+ return charCode >= 48 && charCode <= 57;
881
+ }
882
+ function isValidEnvVariableName(name) {
883
+ if (!name) {
884
+ return false;
885
+ }
886
+ const firstChar = name[0];
887
+ if (!(isAsciiLetter(firstChar) || firstChar === "_")) {
888
+ return false;
889
+ }
890
+ for (let index2 = 1; index2 < name.length; index2 += 1) {
891
+ const char = name[index2];
892
+ if (!(isAsciiLetter(char) || isAsciiDigit(char) || char === "_")) {
893
+ return false;
894
+ }
895
+ }
896
+ return true;
897
+ }
898
+ function isEnvAssignmentToken(token) {
899
+ const separatorIndex = token.indexOf("=");
900
+ if (separatorIndex <= 0) {
901
+ return false;
902
+ }
903
+ return isValidEnvVariableName(token.slice(0, separatorIndex));
904
+ }
905
+ function parseShortOptionCluster(option) {
906
+ if (!option.startsWith("-") || option.startsWith("--") || option.length <= 1) {
907
+ return {
908
+ valid: false,
909
+ hasCommandFlag: false,
910
+ hasStdinFlag: false,
911
+ consumesNextArg: false
912
+ };
913
+ }
914
+ let hasCommandFlag = false;
915
+ let hasStdinFlag = false;
916
+ let consumesNextArg = false;
917
+ for (let index2 = 1; index2 < option.length; index2 += 1) {
918
+ const char = option[index2];
919
+ if (!isAsciiLetter(char)) {
920
+ return {
921
+ valid: false,
922
+ hasCommandFlag: false,
923
+ hasStdinFlag: false,
924
+ consumesNextArg: false
925
+ };
926
+ }
927
+ if (char === "c") {
928
+ hasCommandFlag = true;
929
+ } else if (char === "s") {
930
+ hasStdinFlag = true;
931
+ } else if (char === "O" || char === "o") {
932
+ consumesNextArg = true;
933
+ }
934
+ }
935
+ return { valid: true, hasCommandFlag, hasStdinFlag, consumesNextArg };
936
+ }
937
+ function getShellInvocationDescriptor(args) {
938
+ let readsFromStdin = false;
939
+ const longOptionsWithValue = /* @__PURE__ */ new Set(["--rcfile", "--init-file"]);
940
+ for (let index2 = 0; index2 < args.length; index2 += 1) {
941
+ const token = asStaticWordText(args[index2]);
942
+ if (token == null) {
943
+ return { kind: "unknown", payload: null };
944
+ }
945
+ if (token === "--") {
946
+ if (index2 + 1 >= args.length) {
947
+ break;
948
+ }
949
+ return {
950
+ kind: "script",
951
+ payload: asStaticWordText(args[index2 + 1])
952
+ };
953
+ }
954
+ if (token === "--command") {
955
+ return {
956
+ kind: "command",
957
+ payload: asStaticWordText(args[index2 + 1])
958
+ };
959
+ }
960
+ if (token.startsWith("--command=")) {
961
+ return {
962
+ kind: "command",
963
+ payload: token.slice("--command=".length)
964
+ };
965
+ }
966
+ if (token.startsWith("--")) {
967
+ if (token.includes("=")) {
968
+ continue;
969
+ }
970
+ if (longOptionsWithValue.has(token)) {
971
+ if (index2 + 1 >= args.length) {
972
+ return { kind: "unknown", payload: null };
973
+ }
974
+ index2 += 1;
975
+ }
976
+ continue;
977
+ }
978
+ if (token.startsWith("-") && !token.startsWith("--")) {
979
+ const parsed = parseShortOptionCluster(token);
980
+ if (!parsed.valid) {
981
+ return { kind: "unknown", payload: null };
982
+ }
983
+ if (parsed.hasCommandFlag) {
984
+ return {
985
+ kind: "command",
986
+ payload: asStaticWordText(args[index2 + 1])
987
+ };
988
+ }
989
+ if (parsed.hasStdinFlag) {
990
+ readsFromStdin = true;
991
+ }
992
+ if (parsed.consumesNextArg) {
993
+ if (index2 + 1 >= args.length) {
994
+ return { kind: "unknown", payload: null };
995
+ }
996
+ index2 += 1;
997
+ }
998
+ continue;
999
+ }
1000
+ return {
1001
+ kind: "script",
1002
+ payload: token
1003
+ };
1004
+ }
1005
+ if (readsFromStdin) {
1006
+ return { kind: "stdin", payload: null };
1007
+ }
1008
+ return { kind: "none", payload: null };
1009
+ }
1010
+ function getHereDocPayload(redirections) {
1011
+ const payloads = [];
1012
+ for (const redirection of redirections) {
1013
+ if (redirection.target.type !== "HereDoc") {
1014
+ continue;
1015
+ }
1016
+ if (!redirection.target.content) {
1017
+ payloads.push("");
1018
+ continue;
1019
+ }
1020
+ const payload = asStaticWordText(redirection.target.content);
1021
+ if (payload == null) {
1022
+ return { hasHereDoc: true, payload: null };
1023
+ }
1024
+ payloads.push(payload);
1025
+ }
1026
+ if (payloads.length === 0) {
1027
+ return { hasHereDoc: false, payload: null };
1028
+ }
1029
+ return { hasHereDoc: true, payload: payloads.join("\n") };
1030
+ }
1031
+ function joinStaticWords(words) {
1032
+ const tokens = [];
1033
+ for (const word of words) {
1034
+ const token = asStaticWordText(word);
1035
+ if (token == null) {
1036
+ return null;
1037
+ }
1038
+ tokens.push(token);
1039
+ }
1040
+ return tokens.join(" ");
1041
+ }
1042
+ function resolveEnvWrapperCommand(args) {
1043
+ let index2 = 0;
1044
+ while (index2 < args.length) {
1045
+ const token = asStaticWordText(args[index2]);
1046
+ if (token == null) {
1047
+ return { kind: "unknown" };
1048
+ }
1049
+ if (token === "--") {
1050
+ index2 += 1;
1051
+ break;
1052
+ }
1053
+ if (token === "-u" || token === "--unset" || token === "--chdir") {
1054
+ if (index2 + 1 >= args.length) {
1055
+ return { kind: "unknown" };
1056
+ }
1057
+ index2 += 2;
1058
+ continue;
1059
+ }
1060
+ if (token.startsWith("--unset=") || token.startsWith("--chdir=")) {
1061
+ index2 += 1;
1062
+ continue;
1063
+ }
1064
+ if (token.startsWith("-") && token !== "-" && !isEnvAssignmentToken(token)) {
1065
+ index2 += 1;
1066
+ continue;
1067
+ }
1068
+ if (isEnvAssignmentToken(token)) {
1069
+ index2 += 1;
1070
+ continue;
1071
+ }
1072
+ break;
1073
+ }
1074
+ if (index2 >= args.length) {
1075
+ return { kind: "none" };
1076
+ }
1077
+ return {
1078
+ kind: "resolved",
1079
+ name: args[index2],
1080
+ args: args.slice(index2 + 1)
1081
+ };
844
1082
  }
845
- function commandContainsBlockedCommand(command, context) {
1083
+ function resolveCommandWrapperCommand(args) {
1084
+ let index2 = 0;
1085
+ let lookupOnly = false;
1086
+ while (index2 < args.length) {
1087
+ const token = asStaticWordText(args[index2]);
1088
+ if (token == null) {
1089
+ return { kind: "unknown" };
1090
+ }
1091
+ if (token === "--") {
1092
+ index2 += 1;
1093
+ break;
1094
+ }
1095
+ if (token === "-v" || token === "-V") {
1096
+ lookupOnly = true;
1097
+ index2 += 1;
1098
+ continue;
1099
+ }
1100
+ if (token.startsWith("-") && token !== "-") {
1101
+ index2 += 1;
1102
+ continue;
1103
+ }
1104
+ break;
1105
+ }
1106
+ if (lookupOnly || index2 >= args.length) {
1107
+ return { kind: "none" };
1108
+ }
1109
+ return {
1110
+ kind: "resolved",
1111
+ name: args[index2],
1112
+ args: args.slice(index2 + 1)
1113
+ };
1114
+ }
1115
+ function commandContainsBlockedCommand(command, context, mode, options = { stdinFromPipe: false }) {
846
1116
  switch (command.type) {
847
1117
  case "SimpleCommand":
848
- return isBlockedSimpleCommand(command, context);
1118
+ return isBlockedSimpleCommand(command, context, mode, options);
849
1119
  case "If":
850
1120
  return command.clauses.some(
851
1121
  (clause) => statementsContainBlockedCommand(
852
1122
  clause.condition,
853
- cloneInspectionContext(context)
1123
+ cloneInspectionContext(context),
1124
+ mode
854
1125
  ) || statementsContainBlockedCommand(
855
1126
  clause.body,
856
- cloneInspectionContext(context)
1127
+ cloneInspectionContext(context),
1128
+ mode
857
1129
  )
858
1130
  ) || (command.elseBody ? statementsContainBlockedCommand(
859
1131
  command.elseBody,
860
- cloneInspectionContext(context)
1132
+ cloneInspectionContext(context),
1133
+ mode
861
1134
  ) : false);
862
1135
  case "For":
863
1136
  case "CStyleFor":
864
1137
  return statementsContainBlockedCommand(
865
1138
  command.body,
866
- cloneInspectionContext(context)
1139
+ cloneInspectionContext(context),
1140
+ mode
867
1141
  );
868
1142
  case "While":
869
1143
  case "Until":
870
1144
  return statementsContainBlockedCommand(
871
1145
  command.condition,
872
- cloneInspectionContext(context)
1146
+ cloneInspectionContext(context),
1147
+ mode
873
1148
  ) || statementsContainBlockedCommand(
874
1149
  command.body,
875
- cloneInspectionContext(context)
1150
+ cloneInspectionContext(context),
1151
+ mode
876
1152
  );
877
1153
  case "Case":
878
1154
  return command.items.some(
879
1155
  (item) => statementsContainBlockedCommand(
880
1156
  item.body,
881
- cloneInspectionContext(context)
1157
+ cloneInspectionContext(context),
1158
+ mode
882
1159
  )
883
1160
  );
884
1161
  case "Subshell":
885
1162
  case "Group":
886
1163
  return statementsContainBlockedCommand(
887
1164
  command.body,
888
- cloneInspectionContext(context)
1165
+ cloneInspectionContext(context),
1166
+ mode
889
1167
  );
890
1168
  case "FunctionDef":
891
1169
  return false;
@@ -898,16 +1176,16 @@ function commandContainsBlockedCommand(command, context) {
898
1176
  }
899
1177
  }
900
1178
  }
901
- function isBlockedSimpleCommand(command, context) {
902
- if (wordContainsBlockedCommand(command.name, context)) {
1179
+ function isBlockedSimpleCommand(command, context, mode, options) {
1180
+ if (wordContainsBlockedCommand(command.name, context, mode)) {
903
1181
  return true;
904
1182
  }
905
- if (command.args.some((arg) => wordContainsBlockedCommand(arg, context))) {
1183
+ if (command.args.some((arg) => wordContainsBlockedCommand(arg, context, mode))) {
906
1184
  return true;
907
1185
  }
908
1186
  if (command.assignments.some(
909
- (assignment) => wordContainsBlockedCommand(assignment.value, context) || (assignment.array?.some(
910
- (value) => wordContainsBlockedCommand(value, context)
1187
+ (assignment) => wordContainsBlockedCommand(assignment.value, context, mode) || (assignment.array?.some(
1188
+ (value) => wordContainsBlockedCommand(value, context, mode)
911
1189
  ) ?? false)
912
1190
  )) {
913
1191
  return true;
@@ -916,11 +1194,16 @@ function isBlockedSimpleCommand(command, context) {
916
1194
  if (redirection.target.type === "Word") {
917
1195
  return wordContainsBlockedCommand(
918
1196
  redirection.target,
919
- context
1197
+ context,
1198
+ mode
920
1199
  );
921
1200
  }
922
1201
  if (redirection.target.type === "HereDoc" && redirection.target.content) {
923
- return wordContainsBlockedCommand(redirection.target.content, context);
1202
+ return wordContainsBlockedCommand(
1203
+ redirection.target.content,
1204
+ context,
1205
+ mode
1206
+ );
924
1207
  }
925
1208
  return false;
926
1209
  })) {
@@ -939,9 +1222,92 @@ function isBlockedSimpleCommand(command, context) {
939
1222
  }
940
1223
  if (normalizedName === "sql") {
941
1224
  const subcommand = asStaticWordText(command.args[0])?.toLowerCase();
942
- return !subcommand || !ALLOWED_SQL_PROXY_SUBCOMMANDS.has(subcommand);
1225
+ if (!subcommand) {
1226
+ return true;
1227
+ }
1228
+ if (mode === "block-all-sql") {
1229
+ return true;
1230
+ }
1231
+ return !ALLOWED_SQL_PROXY_SUBCOMMANDS.has(subcommand);
943
1232
  }
944
- if (functionInvocationContainsBlockedCommand(commandName, context)) {
1233
+ const inspectWrappedCommand = (resolved) => {
1234
+ if (resolved.kind === "none") {
1235
+ return false;
1236
+ }
1237
+ if (resolved.kind === "unknown" || !resolved.name || !resolved.args) {
1238
+ return true;
1239
+ }
1240
+ return isBlockedSimpleCommand(
1241
+ {
1242
+ name: resolved.name,
1243
+ args: resolved.args,
1244
+ assignments: [],
1245
+ redirections: []
1246
+ },
1247
+ context,
1248
+ "block-all-sql",
1249
+ options
1250
+ );
1251
+ };
1252
+ if (WRAPPER_COMMANDS.has(normalizedName)) {
1253
+ if (normalizedName === "env") {
1254
+ return inspectWrappedCommand(resolveEnvWrapperCommand(command.args));
1255
+ }
1256
+ if (normalizedName === "command") {
1257
+ return inspectWrappedCommand(resolveCommandWrapperCommand(command.args));
1258
+ }
1259
+ const evalScript = joinStaticWords(command.args);
1260
+ if (evalScript == null) {
1261
+ return true;
1262
+ }
1263
+ if (!evalScript.trim()) {
1264
+ return false;
1265
+ }
1266
+ return stringCommandContainsBlockedCommand(
1267
+ evalScript,
1268
+ context,
1269
+ "block-all-sql"
1270
+ );
1271
+ }
1272
+ if (SHELL_INTERPRETER_COMMANDS.has(normalizedName)) {
1273
+ const shellInvocation = getShellInvocationDescriptor(command.args);
1274
+ if (shellInvocation.kind === "unknown") {
1275
+ return true;
1276
+ }
1277
+ if (shellInvocation.kind === "command") {
1278
+ if (!shellInvocation.payload) {
1279
+ return true;
1280
+ }
1281
+ if (stringCommandContainsBlockedCommand(
1282
+ shellInvocation.payload,
1283
+ context,
1284
+ "block-all-sql"
1285
+ )) {
1286
+ return true;
1287
+ }
1288
+ return false;
1289
+ }
1290
+ const hereDoc = getHereDocPayload(command.redirections);
1291
+ if (hereDoc.hasHereDoc) {
1292
+ if (hereDoc.payload == null) {
1293
+ return true;
1294
+ }
1295
+ if (hereDoc.payload.trim().length > 0 && stringCommandContainsBlockedCommand(
1296
+ hereDoc.payload,
1297
+ context,
1298
+ "block-all-sql"
1299
+ )) {
1300
+ return true;
1301
+ }
1302
+ }
1303
+ if (shellInvocation.kind === "script") {
1304
+ return true;
1305
+ }
1306
+ if (options.stdinFromPipe || shellInvocation.kind === "stdin") {
1307
+ return !hereDoc.hasHereDoc;
1308
+ }
1309
+ }
1310
+ if (functionInvocationContainsBlockedCommand(commandName, context, mode)) {
945
1311
  return true;
946
1312
  }
947
1313
  return false;
@@ -1073,13 +1439,17 @@ import "@deepagents/agent";
1073
1439
  import {
1074
1440
  ContextEngine as ContextEngine2,
1075
1441
  InMemoryContextStore as InMemoryContextStore2,
1442
+ example,
1076
1443
  fragment as fragment2,
1444
+ guardrail,
1445
+ hint as hint2,
1077
1446
  persona as persona3,
1078
1447
  policy,
1079
1448
  structuredOutput as structuredOutput2,
1080
- user as user2
1449
+ user as user2,
1450
+ workflow
1081
1451
  } from "@deepagents/context";
1082
- var RETRY_TEMPERATURES = [0, 0.2, 0.3];
1452
+ var RETRY_TEMPERATURES = [0, 0.4, 0.8];
1083
1453
  var SQL_AGENT_ROLE = "Expert SQL query generator.";
1084
1454
  var SQL_AGENT_OBJECTIVE = "Generate precise SQL grounded in provided schema.";
1085
1455
  var SQL_AGENT_POLICIES = [
@@ -1087,94 +1457,201 @@ var SQL_AGENT_POLICIES = [
1087
1457
  "schema_mapping",
1088
1458
  policy({
1089
1459
  rule: "Translate natural language into precise SQL grounded in available schema entities."
1460
+ }),
1461
+ hint2("Preserve schema spelling exactly, including typos in column names.")
1462
+ ),
1463
+ fragment2(
1464
+ "projection_minimality",
1465
+ policy({
1466
+ rule: "Return only columns requested by the question; do not add helper columns unless explicitly requested."
1467
+ }),
1468
+ policy({
1469
+ rule: 'For requests of the form "X sorted/ordered by Y", project X only unless Y is explicitly requested as an output field.'
1470
+ }),
1471
+ policy({
1472
+ rule: "Prefer selecting schema columns directly without derived expressions when direct selection answers the request."
1473
+ }),
1474
+ hint2(
1475
+ "Do not include ORDER BY, GROUP BY, or JOIN helper columns in SELECT output unless the question explicitly asks for them."
1476
+ ),
1477
+ policy({
1478
+ rule: "Use DISTINCT only when uniqueness is explicitly requested (for example distinct/unique/different/no duplicates)."
1479
+ }),
1480
+ hint2(
1481
+ 'Do not infer DISTINCT from generic wording such as "some", plural nouns, or entity-set phrasing; for transactional/attendance-style tables, default to raw rows unless uniqueness is explicitly requested.'
1482
+ )
1483
+ ),
1484
+ fragment2(
1485
+ "date_transform_safety",
1486
+ policy({
1487
+ rule: "Do not assume VARCHAR/TEXT values are parseable dates. Avoid date extraction functions on text columns by default."
1488
+ }),
1489
+ policy({
1490
+ rule: "Use date-part extraction only when both conditions hold: the question explicitly asks for transformation and schema values require transformation to produce that unit."
1491
+ }),
1492
+ hint2(
1493
+ "Do not apply SUBSTR, STRFTIME, DATE_PART, YEAR, or similar extraction functions unless the question explicitly asks for transformation and schema values require it."
1494
+ ),
1495
+ hint2(
1496
+ "If a column already represents the requested concept (for example a stored year-like value), use the column as-is."
1497
+ )
1498
+ ),
1499
+ fragment2(
1500
+ "sql_minimality",
1501
+ guardrail({
1502
+ rule: "Never hallucinate tables or columns.",
1503
+ reason: "Schema fidelity is required.",
1504
+ action: "Use only available schema entities."
1505
+ }),
1506
+ guardrail({
1507
+ rule: "Avoid unnecessary transformations and derived projections.",
1508
+ reason: "Extra transformations frequently change semantics and reduce correctness.",
1509
+ action: "Do not add date parsing, substring extraction, or derived columns unless explicitly required by the question or schema."
1510
+ })
1511
+ ),
1512
+ fragment2(
1513
+ "preflight_checklist",
1514
+ workflow({
1515
+ task: "Final SQL preflight before returning output",
1516
+ steps: [
1517
+ "Verify selected columns match the question and remove unrequested helper projections.",
1518
+ "If aggregate values are used only for ranking/filtering, keep them out of SELECT unless explicitly requested.",
1519
+ "Prefer raw schema columns over derived expressions when raw columns already satisfy the request.",
1520
+ "If a candidate query uses STRFTIME, SUBSTR, DATE_PART, YEAR, or similar extraction on text-like columns, remove that transformation unless explicitly required by the question.",
1521
+ "Return only schema-grounded SQL using existing tables and columns."
1522
+ ]
1523
+ })
1524
+ ),
1525
+ fragment2(
1526
+ "set_semantics",
1527
+ policy({
1528
+ rule: "For questions asking where both condition A and condition B hold over an attribute, compute the intersection of qualifying sets for that attribute."
1529
+ }),
1530
+ policy({
1531
+ rule: "Do not force the same entity instance to satisfy both conditions unless the question explicitly requests the same person/row/entity."
1532
+ }),
1533
+ hint2(
1534
+ "Prefer INTERSECT (or logically equivalent set-based shape) over requiring the same physical row/entity to satisfy both conditions unless explicitly requested."
1535
+ ),
1536
+ hint2(
1537
+ "When two conditions describe different row groups whose shared attribute is requested, build each group separately and intersect the attribute values."
1538
+ ),
1539
+ hint2(
1540
+ "Do not collapse cross-group conditions into a single-row AND predicate when the intent is shared values across groups."
1541
+ ),
1542
+ policy({
1543
+ rule: "If two predicates on the same field cannot both be true for one row, do not combine them with AND; use set operations across separate filtered subsets when shared values are requested."
1544
+ })
1545
+ ),
1546
+ fragment2(
1547
+ "predicate_column_alignment",
1548
+ policy({
1549
+ rule: "Match literal values to semantically compatible columns. Do not compare descriptive names to identifier columns."
1550
+ }),
1551
+ hint2(
1552
+ "When a filter value is a descriptive label (for example a department name), join through the lookup table and filter on its name/title column, not on *_id columns."
1553
+ ),
1554
+ hint2(
1555
+ "When relation roles are explicit in wording (for example host/home/source/destination), prefer foreign keys with matching role qualifiers over generic similarly named columns."
1556
+ ),
1557
+ policy({
1558
+ rule: "When multiple foreign-key candidates exist, select the column whose qualifier best matches the relationship described in the question."
1559
+ }),
1560
+ policy({
1561
+ rule: "For hosting/held semantics, prefer host_* relationship columns when available over generic *_id alternatives."
1562
+ }),
1563
+ hint2(
1564
+ 'Interpret wording like "held/hosted a competition or event" as a hosting relationship and map to host_* foreign keys when present.'
1565
+ ),
1566
+ policy({
1567
+ rule: "Do not compare descriptive labels or names to *_id columns; join to the table containing the descriptive field and filter there."
1568
+ }),
1569
+ policy({
1570
+ rule: "Keep numeric identifiers unquoted when used as numeric equality filters unless schema indicates text identifiers."
1571
+ }),
1572
+ policy({
1573
+ rule: "When filtering by a descriptive label value and a related table exposes a corresponding *_name or title column, join to that table and filter on the descriptive column."
1574
+ })
1575
+ ),
1576
+ fragment2(
1577
+ "ordering_semantics",
1578
+ policy({
1579
+ rule: "Respect explicit sort direction terms. If direction is not specified, use ascending order unless a superlative intent (most/least/highest/lowest) implies direction."
1580
+ }),
1581
+ policy({
1582
+ rule: "When ranking categories by frequency, use COUNT for ordering but keep output focused on requested category fields unless counts are explicitly requested."
1583
+ }),
1584
+ policy({
1585
+ rule: "Do not use DESC unless descending direction is explicit or a superlative intent requires descending ranking."
1586
+ }),
1587
+ policy({
1588
+ rule: 'For "most common/frequent <attribute>" requests, return the attribute value(s) only; use counts only for ordering/filtering unless the question explicitly asks to return counts.'
1589
+ }),
1590
+ hint2(
1591
+ 'Use DESC with LIMIT 1 for "most/highest/largest"; use ASC with LIMIT 1 for "least/lowest/smallest".'
1592
+ )
1593
+ ),
1594
+ fragment2(
1595
+ "negative_membership_queries",
1596
+ policy({
1597
+ rule: "For requests asking entities that did not participate/host/appear in related records, prefer NOT IN or NOT EXISTS against the related foreign-key set."
1598
+ }),
1599
+ hint2(
1600
+ "Map role-bearing relationship columns carefully (for example host_* foreign keys for hosting relationships) instead of generic IDs when role wording is explicit."
1601
+ ),
1602
+ hint2(
1603
+ 'For "never had/never exceeded" conditions over history tables, exclude entities via NOT IN/NOT EXISTS against the disqualifying entity-id set (often built with GROUP BY/HAVING MAX(...)).'
1604
+ )
1605
+ ),
1606
+ fragment2(
1607
+ "join_completeness",
1608
+ policy({
1609
+ rule: "Preserve entity-restricting joins implied by the question. Do not widen results by querying only a broader attribute table when a subset entity table is available."
1610
+ }),
1611
+ policy({
1612
+ rule: "If an entity term in the question maps to a table, keep that table in query scope and join to attribute tables rather than dropping the entity table."
1613
+ }),
1614
+ hint2(
1615
+ "If the question targets a specific entity group, include that entity table and its join conditions even when selected columns come from a related table."
1616
+ ),
1617
+ hint2(
1618
+ "When the question names an entity type and a relation table links to that entity via *_id, include the entity table in scope instead of counting only relation rows."
1619
+ ),
1620
+ hint2(
1621
+ "Prefer INNER JOIN by default; use LEFT JOIN only when the question explicitly requests including unmatched rows or zero-related entities."
1622
+ )
1623
+ ),
1624
+ fragment2(
1625
+ "aggregation_exactness",
1626
+ policy({
1627
+ rule: "Preserve requested aggregation semantics exactly: use COUNT(*) by default for total rows, use COUNT(DISTINCT ...) only when uniqueness is explicitly requested, and group by stable entity keys when computing per-entity aggregates."
1628
+ }),
1629
+ policy({
1630
+ rule: "For questions asking which entity has lowest/highest average of a metric, compute AVG(metric) per entity (GROUP BY entity) and rank those aggregates."
1631
+ }),
1632
+ hint2(
1633
+ 'For "how many <entities>" questions over relation records, default to COUNT(*) on qualifying rows unless explicit uniqueness language is present.'
1634
+ )
1635
+ ),
1636
+ fragment2(
1637
+ "query_shape_examples",
1638
+ example({
1639
+ question: "List categories ordered by how many records belong to each category.",
1640
+ answer: "SELECT category FROM records GROUP BY category ORDER BY COUNT(*)"
1641
+ }),
1642
+ example({
1643
+ question: "Show labels shared by rows with metric > 100 and rows with metric < 10.",
1644
+ answer: "SELECT label FROM records WHERE metric > 100 INTERSECT SELECT label FROM records WHERE metric < 10"
1645
+ }),
1646
+ example({
1647
+ question: "List locations that have not hosted any event.",
1648
+ answer: "SELECT location_name FROM locations WHERE location_id NOT IN (SELECT host_location_id FROM events)"
1649
+ }),
1650
+ example({
1651
+ question: "List the most common category across records.",
1652
+ answer: "SELECT category FROM records GROUP BY category ORDER BY COUNT(*) DESC LIMIT 1"
1090
1653
  })
1091
- // policy({
1092
- // rule: 'Before returning an error, perform a schema-grounded self-check: identify core intent, draft best-effort SQL, then verify it uses only existing tables/columns.',
1093
- // }),
1094
- // policy({
1095
- // rule: 'Return unanswerable only if that self-check confirms no valid SQL can express the required intent without inventing schema elements.',
1096
- // }),
1097
- // policy({
1098
- // rule: 'Prefer a best-effort valid SQL query when entities can be reasonably inferred from table or column names.',
1099
- // }),
1100
- // policy({
1101
- // rule: 'Use lexical normalization (singular/plural, paraphrases, role synonyms, and minor wording differences) to align question terms with schema names.',
1102
- // }),
1103
- // policy({
1104
- // rule: 'Decompose noun phrases into core entity and qualifiers, and map the core entity first.',
1105
- // }),
1106
- // policy({
1107
- // rule: 'Do not require every descriptive word to map to a separate schema field when the core entity match is unambiguous.',
1108
- // }),
1109
- // policy({
1110
- // rule: 'For phrases like "X of Y", treat Y as contextual (non-blocking) when Y has no mapped schema field and the question does not ask to filter/group/select by Y explicitly.',
1111
- // }),
1112
- // policy({
1113
- // rule: 'Treat unmatched qualifiers as blockers only when they are restrictive constraints (specific values, comparisons, or conditions that change row eligibility).',
1114
- // }),
1115
- // hint('Preserve schema spelling exactly, including typos in column names.'),
1116
1654
  )
1117
- // fragment(
1118
- // 'unanswerable_gate',
1119
- // workflow({
1120
- // task: 'Unanswerable decision',
1121
- // steps: [
1122
- // 'Identify the core intent (metric/projection and required filters).',
1123
- // 'Attempt schema-grounded mapping for the core intent before considering error.',
1124
- // 'If a valid SELECT can answer the core intent without inventing schema entities, return SQL.',
1125
- // 'Return unanswerable only when required information cannot be mapped to any available table or column.',
1126
- // ],
1127
- // }),
1128
- // policy({
1129
- // rule: 'Do not reject a question as unanswerable when requested information can be derived by filtering, joining, grouping, counting, set operations, or sorting on available columns.',
1130
- // }),
1131
- // ),
1132
- // fragment(
1133
- // 'query_shape_preferences',
1134
- // hint(
1135
- // 'Prefer explicit INNER JOINs over LEFT JOINs unless the question requires unmatched rows.',
1136
- // ),
1137
- // hint(
1138
- // 'Prefer direct joins over dropping join constraints or using weaker alternatives.',
1139
- // ),
1140
- // hint('Use DISTINCT only when uniqueness is explicitly requested.'),
1141
- // hint(
1142
- // 'For superlatives over grouped entities (most/least/highest/lowest by group), prefer GROUP BY with ORDER BY aggregate and LIMIT 1.',
1143
- // ),
1144
- // hint(
1145
- // 'For average/count conditions per entity, prefer GROUP BY with HAVING aggregate predicates over row-level WHERE predicates.',
1146
- // ),
1147
- // hint(
1148
- // 'For "both" conditions across two criteria, prefer INTERSECT when selecting shared values.',
1149
- // ),
1150
- // hint(
1151
- // 'For "A or B" retrieval across criteria, prefer UNION when combining two qualifying sets.',
1152
- // ),
1153
- // hint(
1154
- // 'For "never" constraints against related records, prefer NOT IN or EXCEPT against the disqualifying set.',
1155
- // ),
1156
- // hint(
1157
- // 'Use equality predicates for exact values unless the question asks for pattern matching.',
1158
- // ),
1159
- // hint(
1160
- // 'Keep numeric literals unquoted when they are purely numeric tokens in the question.',
1161
- // ),
1162
- // ),
1163
- // fragment(
1164
- // 'sql_minimality',
1165
- // guardrail({
1166
- // rule: 'Never hallucinate tables or columns.',
1167
- // reason: 'Schema fidelity is required.',
1168
- // action: 'Use only available schema entities.',
1169
- // }),
1170
- // guardrail({
1171
- // rule: 'Prefer the minimal query over transformed expressions.',
1172
- // reason:
1173
- // 'Unnecessary transformations reduce correctness and add avoidable complexity.',
1174
- // action:
1175
- // 'Do not add date parsing, substring extraction, derived projections, or extra selected columns unless explicitly requested or required by schema mismatch.',
1176
- // }),
1177
- // ),
1178
1655
  ];
1179
1656
  function extractSql(output) {
1180
1657
  const match = output.match(/```sql\n?([\s\S]*?)```/);
@@ -4338,17 +4815,17 @@ var TrackedFs = class {
4338
4815
  // packages/text2sql/src/lib/instructions.ts
4339
4816
  import {
4340
4817
  clarification,
4341
- example,
4818
+ example as example2,
4342
4819
  explain,
4343
4820
  fragment as fragment3,
4344
- guardrail,
4345
- hint as hint2,
4821
+ guardrail as guardrail2,
4822
+ hint as hint3,
4346
4823
  policy as policy2,
4347
4824
  principle,
4348
4825
  quirk,
4349
4826
  role,
4350
4827
  styleGuide,
4351
- workflow
4828
+ workflow as workflow2
4352
4829
  } from "@deepagents/context";
4353
4830
  function reasoningFramework() {
4354
4831
  return [
@@ -4357,7 +4834,7 @@ function reasoningFramework() {
4357
4834
  ),
4358
4835
  fragment3(
4359
4836
  "meta-cognitive-reasoning-framework",
4360
- hint2(
4837
+ hint3(
4361
4838
  "Before taking any action (either tool calls *or* responses to the user), you must proactively, methodically, and independently plan and reason about:"
4362
4839
  ),
4363
4840
  // 1) Logical dependencies and constraints
@@ -4498,7 +4975,7 @@ function guidelines(options = {}) {
4498
4975
  // Few-shot: Applying reasoning principles
4499
4976
  fragment3(
4500
4977
  "reasoning-examples",
4501
- example({
4978
+ example2({
4502
4979
  question: "Show me sales last month",
4503
4980
  answer: `Applying Principle 1 (Logical dependencies):
4504
4981
  - Need: schema to know which table has sales data
@@ -4510,7 +4987,7 @@ Applying Principle 5 (Information availability):
4510
4987
 
4511
4988
  Action: Ask user for date range clarification BEFORE generating SQL.`
4512
4989
  }),
4513
- example({
4990
+ example2({
4514
4991
  question: "Why did my query return no results?",
4515
4992
  answer: `Applying Principle 3 (Abductive reasoning):
4516
4993
  - Hypothesis 1 (most likely): Filter too restrictive
@@ -4524,7 +5001,7 @@ Testing hypotheses:
4524
5001
 
4525
5002
  Action: Start with most likely hypothesis, test incrementally. NEVER guess.`
4526
5003
  }),
4527
- example({
5004
+ example2({
4528
5005
  question: "Get me the top customers",
4529
5006
  answer: `Applying Principle 1 (Logical dependencies):
4530
5007
  - "Top" is ambiguous\u2014by revenue? by order count? by recency?
@@ -4538,10 +5015,10 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
4538
5015
  // Schema adherence - consolidated into clear rules
4539
5016
  fragment3(
4540
5017
  "schema_adherence",
4541
- hint2(
5018
+ hint3(
4542
5019
  "Use only tables and columns from the schema. For unspecified columns, use SELECT *. When showing related items, include IDs and requested details."
4543
5020
  ),
4544
- hint2(
5021
+ hint3(
4545
5022
  '"Show" means list items; "count" or "total" means aggregate. Use canonical values verbatim for filtering.'
4546
5023
  )
4547
5024
  ),
@@ -4557,18 +5034,18 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
4557
5034
  explanation: "Measures how closely the physical row order matches the logical sort order of the column. Values near 1 or -1 mean the data is well-ordered; near 0 means scattered",
4558
5035
  therefore: "High correlation means range queries (BETWEEN, >, <) on that column benefit from index scans. Low correlation means the index is less effective for ranges"
4559
5036
  }),
4560
- hint2(
5037
+ hint3(
4561
5038
  "When min/max stats are available, use them to validate filter values. If a user asks for values outside the known range, warn them the query may return no results."
4562
5039
  )
4563
5040
  ),
4564
5041
  // Joins - use relationship metadata
4565
- hint2(
5042
+ hint3(
4566
5043
  "Use JOINs based on schema relationships. Favor PK/indexed columns; follow relationship metadata for direction and cardinality."
4567
5044
  ),
4568
5045
  // Aggregations - explain the concepts
4569
5046
  fragment3(
4570
5047
  "Aggregations",
4571
- hint2(
5048
+ hint3(
4572
5049
  "Apply COUNT, SUM, AVG when the question implies summarization. Use window functions for ranking, running totals, or row comparisons."
4573
5050
  ),
4574
5051
  explain({
@@ -4593,7 +5070,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
4593
5070
  issue: "NULL values behave unexpectedly in comparisons and aggregations",
4594
5071
  workaround: "Use IS NULL, IS NOT NULL, or COALESCE() to handle NULLs explicitly"
4595
5072
  }),
4596
- hint2(
5073
+ hint3(
4597
5074
  "Always include mentioned filters from joined tables in WHERE conditions."
4598
5075
  )
4599
5076
  ),
@@ -4608,22 +5085,22 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
4608
5085
  // Safety guardrails - consolidated
4609
5086
  fragment3(
4610
5087
  "Query safety",
4611
- guardrail({
5088
+ guardrail2({
4612
5089
  rule: "Generate only valid, executable SELECT/WITH statements.",
4613
5090
  reason: "Read-only access prevents data modification.",
4614
5091
  action: "Never generate INSERT, UPDATE, DELETE, DROP, or DDL statements."
4615
5092
  }),
4616
- guardrail({
5093
+ guardrail2({
4617
5094
  rule: "Avoid unbounded scans and cartesian joins.",
4618
5095
  reason: "Protects performance and correctness.",
4619
5096
  action: "Apply filters on indexed columns. If join keys are unclear, ask for clarification."
4620
5097
  }),
4621
- guardrail({
5098
+ guardrail2({
4622
5099
  rule: "Preserve query semantics.",
4623
5100
  reason: "Arbitrary modifications change results.",
4624
5101
  action: 'Only add LIMIT for explicit "top N" requests. Add ORDER BY for deterministic results.'
4625
5102
  }),
4626
- guardrail({
5103
+ guardrail2({
4627
5104
  rule: "Seek clarification for genuine ambiguity.",
4628
5105
  reason: "Prevents incorrect assumptions.",
4629
5106
  action: "Ask a focused question before guessing."
@@ -4634,10 +5111,10 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
4634
5111
  ask: "Clarify the ranking metric or definition.",
4635
5112
  reason: "Ensures correct aggregation and ordering."
4636
5113
  }),
4637
- hint2(
5114
+ hint3(
4638
5115
  'Use sample cell values from schema hints to match exact casing and format in WHERE conditions (e.g., "Male" vs "male" vs "M").'
4639
5116
  ),
4640
- workflow({
5117
+ workflow2({
4641
5118
  task: "SQL generation",
4642
5119
  steps: [
4643
5120
  "Schema linking: identify which tables and columns are mentioned or implied by the question.",
@@ -4649,7 +5126,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
4649
5126
  "Verify: mentally translate SQL back to natural language. Does it match the original question?"
4650
5127
  ]
4651
5128
  }),
4652
- workflow({
5129
+ workflow2({
4653
5130
  task: "Error recovery",
4654
5131
  triggers: ["SQL error", "query failed", "execution error"],
4655
5132
  steps: [
@@ -4662,7 +5139,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
4662
5139
  ],
4663
5140
  notes: "Maximum 3 retry attempts. If still failing, explain the issue to the user."
4664
5141
  }),
4665
- workflow({
5142
+ workflow2({
4666
5143
  task: "Complex query decomposition",
4667
5144
  triggers: [
4668
5145
  "multiple conditions",
@@ -4679,7 +5156,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
4679
5156
  ],
4680
5157
  notes: "Complex questions often need CTEs (WITH clauses) for clarity and reusability."
4681
5158
  }),
4682
- workflow({
5159
+ workflow2({
4683
5160
  task: "Multi-turn context",
4684
5161
  triggers: ["follow-up", "and also", "what about", "same but", "instead"],
4685
5162
  steps: [
@@ -4694,7 +5171,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
4694
5171
  }),
4695
5172
  fragment3(
4696
5173
  "Bash tool usage",
4697
- workflow({
5174
+ workflow2({
4698
5175
  task: "Query execution",
4699
5176
  steps: [
4700
5177
  'Execute SQL through bash tool: sql run "SELECT ..."',
@@ -4703,16 +5180,16 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
4703
5180
  "For large results, slice first: cat <path> | jq '.[:10]'"
4704
5181
  ]
4705
5182
  }),
4706
- hint2(
5183
+ hint3(
4707
5184
  `You cannot access sql through a tool, it'll fail so the proper way to access it is through the bash tool using "sql run" and "sql validate" commands.`
4708
5185
  ),
4709
- hint2(
5186
+ hint3(
4710
5187
  "The sql command outputs: file path, column names (comma-separated), and row count. Use column names to construct precise jq queries."
4711
5188
  ),
4712
- hint2(
5189
+ hint3(
4713
5190
  'This is virtual bash environment and "sql" commands proxy to the database hence you cannot access sql files directly.'
4714
5191
  ),
4715
- hint2(
5192
+ hint3(
4716
5193
  "If a query fails, the sql command returns an error message in stderr."
4717
5194
  )
4718
5195
  )
@@ -4727,7 +5204,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
4727
5204
  );
4728
5205
  } else {
4729
5206
  baseTeachings.push(
4730
- hint2(
5207
+ hint3(
4731
5208
  'When a month, day, or time period is mentioned without a year (e.g., "in August", "on Monday"), assume ALL occurrences of that period in the data. Do not ask for year clarification.'
4732
5209
  )
4733
5210
  );