npm - @deepagents/text2sql - Versions diffs - 0.19.0 → 0.22.0 - Mend

@deepagents/text2sql 0.19.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/dist/index.d.ts +1 -0
package/dist/index.d.ts.map +1 -1
package/dist/index.js +759 -137
package/dist/index.js.map +4 -4
package/dist/lib/agents/exceptions.d.ts +20 -0
package/dist/lib/agents/exceptions.d.ts.map +1 -0
package/dist/lib/agents/result-tools.d.ts.map +1 -1
package/dist/lib/agents/sql.agent.d.ts +0 -17
package/dist/lib/agents/sql.agent.d.ts.map +1 -1
package/dist/lib/synthesis/index.js +359 -103
package/dist/lib/synthesis/index.js.map +4 -4
package/dist/lib/synthesis/synthesizers/depth-evolver.d.ts.map +1 -1
package/dist/lib/synthesis/synthesizers/schema-synthesizer.d.ts.map +1 -1
package/package.json +4 -4

package/dist/index.js CHANGED Viewed

@@ -511,6 +511,32 @@ var fragments = [
   hint("When validating user SQL, explain any errors clearly")
 ];
+// packages/text2sql/src/lib/agents/exceptions.ts
+var sqlValidationMarker = Symbol("SQLValidationError");
+var unanswerableSqlMarker = Symbol("UnanswerableSQLError");
+var SQLValidationError = class _SQLValidationError extends Error {
+  [sqlValidationMarker];
+  constructor(message2) {
+    super(message2);
+    this.name = "SQLValidationError";
+    this[sqlValidationMarker] = true;
+  }
+  static isInstance(error) {
+    return error instanceof _SQLValidationError && error[sqlValidationMarker] === true;
+  }
+};
+var UnanswerableSQLError = class _UnanswerableSQLError extends Error {
+  [unanswerableSqlMarker];
+  constructor(message2) {
+    super(message2);
+    this.name = "UnanswerableSQLError";
+    this[unanswerableSqlMarker] = true;
+  }
+  static isInstance(error) {
+    return error instanceof _UnanswerableSQLError && error[unanswerableSqlMarker] === true;
+  }
+};
 // packages/text2sql/src/lib/agents/result-tools.ts
 import { tool as tool2 } from "ai";
 import { createBashTool } from "bash-tool";
@@ -659,6 +685,14 @@ var BLOCKED_DB_CLIENT_COMMANDS = /* @__PURE__ */ new Set([
 ]);
 var BLOCKED_RAW_SQL_COMMANDS = /* @__PURE__ */ new Set(["select", "with"]);
 var ALLOWED_SQL_PROXY_SUBCOMMANDS = /* @__PURE__ */ new Set(["run", "validate"]);
+var SHELL_INTERPRETER_COMMANDS = /* @__PURE__ */ new Set([
+  "bash",
+  "sh",
+  "zsh",
+  "dash",
+  "ksh"
+]);
+var WRAPPER_COMMANDS = /* @__PURE__ */ new Set(["env", "command", "eval"]);
 var SQL_PROXY_ENFORCEMENT_MESSAGE = [
   "Direct database querying through bash is blocked.",
   "Use SQL proxy commands in this order:",
@@ -714,82 +748,94 @@ function isScriptNode(value) {
   const node = value;
   return node.type === "Script" && Array.isArray(node.statements);
 }
-function scriptContainsBlockedCommand(script, context) {
-  return statementsContainBlockedCommand(script.statements, context);
+function scriptContainsBlockedCommand(script, context, mode = "blocked-only") {
+  return statementsContainBlockedCommand(script.statements, context, mode);
 }
-function statementsContainBlockedCommand(statements, context) {
+function statementsContainBlockedCommand(statements, context, mode) {
   for (const statement of statements) {
-    if (statementContainsBlockedCommand(statement, context)) {
+    if (statementContainsBlockedCommand(statement, context, mode)) {
       return true;
     }
   }
   return false;
 }
-function statementContainsBlockedCommand(statement, context) {
+function statementContainsBlockedCommand(statement, context, mode) {
   for (const pipeline of statement.pipelines) {
-    if (pipelineContainsBlockedCommand(pipeline, context)) {
+    if (pipelineContainsBlockedCommand(pipeline, context, mode)) {
       return true;
     }
   }
   return false;
 }
-function pipelineContainsBlockedCommand(pipeline, context) {
-  for (const command of pipeline.commands) {
+function pipelineContainsBlockedCommand(pipeline, context, mode) {
+  for (const [index2, command] of pipeline.commands.entries()) {
     if (command.type === "FunctionDef") {
       context.functionDefinitions.set(command.name, command);
       continue;
     }
-    if (commandContainsBlockedCommand(command, context)) {
+    if (commandContainsBlockedCommand(command, context, mode, {
+      stdinFromPipe: index2 > 0
+    })) {
       return true;
     }
   }
   return false;
 }
-function stringCommandContainsBlockedCommand(command, context) {
+function stringCommandContainsBlockedCommand(command, context, mode = "blocked-only") {
   let script;
   try {
     script = parse(command);
   } catch {
     return false;
   }
-  return scriptContainsBlockedCommand(script, cloneInspectionContext(context));
+  return scriptContainsBlockedCommand(
+    script,
+    cloneInspectionContext(context),
+    mode
+  );
 }
-function wordContainsBlockedCommand(word, context) {
+function wordContainsBlockedCommand(word, context, mode) {
   if (!word) {
     return false;
   }
   return wordPartContainsBlockedCommand(
     word.parts,
-    context
+    context,
+    mode
   );
 }
-function wordPartContainsBlockedCommand(parts, context) {
+function wordPartContainsBlockedCommand(parts, context, mode) {
   for (const part of parts) {
-    if (partContainsBlockedCommand(part, context)) {
+    if (partContainsBlockedCommand(part, context, mode)) {
       return true;
     }
   }
   return false;
 }
-function partContainsBlockedCommand(node, context) {
+function partContainsBlockedCommand(node, context, mode) {
   const type = node.type;
   if (type === "CommandSubstitution" || type === "ProcessSubstitution") {
     if (isScriptNode(node.body)) {
       return scriptContainsBlockedCommand(
         node.body,
-        cloneInspectionContext(context)
+        cloneInspectionContext(context),
+        mode
       );
     }
     return false;
   }
   if (type === "ArithCommandSubst" && typeof node.command === "string") {
-    return stringCommandContainsBlockedCommand(node.command, context);
+    return stringCommandContainsBlockedCommand(node.command, context, mode);
   }
   for (const value of Object.values(node)) {
     if (Array.isArray(value)) {
       for (const item of value) {
         if (typeof item === "object" && item !== null) {
-          if (partContainsBlockedCommand(item, context)) {
+          if (partContainsBlockedCommand(
+            item,
+            context,
+            mode
+          )) {
             return true;
           }
         }
@@ -797,14 +843,18 @@ function partContainsBlockedCommand(node, context) {
       continue;
     }
     if (typeof value === "object" && value !== null) {
-      if (partContainsBlockedCommand(value, context)) {
+      if (partContainsBlockedCommand(
+        value,
+        context,
+        mode
+      )) {
         return true;
       }
     }
   }
   return false;
 }
-function functionInvocationContainsBlockedCommand(functionName, context) {
+function functionInvocationContainsBlockedCommand(functionName, context, mode) {
   const definition = context.functionDefinitions.get(functionName);
   if (!definition) {
     return false;
@@ -814,52 +864,306 @@ function functionInvocationContainsBlockedCommand(functionName, context) {
   }
   const invocationContext = cloneInspectionContext(context);
   invocationContext.callStack.add(functionName);
-  return commandContainsBlockedCommand(definition.body, invocationContext);
+  return commandContainsBlockedCommand(
+    definition.body,
+    invocationContext,
+    mode,
+    { stdinFromPipe: false }
+  );
+}
+function isAsciiLetter(character) {
+  const charCode = character.charCodeAt(0);
+  return charCode >= 65 && charCode <= 90 || charCode >= 97 && charCode <= 122;
+}
+function isAsciiDigit(character) {
+  const charCode = character.charCodeAt(0);
+  return charCode >= 48 && charCode <= 57;
+}
+function isValidEnvVariableName(name) {
+  if (!name) {
+    return false;
+  }
+  const firstChar = name[0];
+  if (!(isAsciiLetter(firstChar) || firstChar === "_")) {
+    return false;
+  }
+  for (let index2 = 1; index2 < name.length; index2 += 1) {
+    const char = name[index2];
+    if (!(isAsciiLetter(char) || isAsciiDigit(char) || char === "_")) {
+      return false;
+    }
+  }
+  return true;
+}
+function isEnvAssignmentToken(token) {
+  const separatorIndex = token.indexOf("=");
+  if (separatorIndex <= 0) {
+    return false;
+  }
+  return isValidEnvVariableName(token.slice(0, separatorIndex));
+}
+function parseShortOptionCluster(option) {
+  if (!option.startsWith("-") || option.startsWith("--") || option.length <= 1) {
+    return {
+      valid: false,
+      hasCommandFlag: false,
+      hasStdinFlag: false,
+      consumesNextArg: false
+    };
+  }
+  let hasCommandFlag = false;
+  let hasStdinFlag = false;
+  let consumesNextArg = false;
+  for (let index2 = 1; index2 < option.length; index2 += 1) {
+    const char = option[index2];
+    if (!isAsciiLetter(char)) {
+      return {
+        valid: false,
+        hasCommandFlag: false,
+        hasStdinFlag: false,
+        consumesNextArg: false
+      };
+    }
+    if (char === "c") {
+      hasCommandFlag = true;
+    } else if (char === "s") {
+      hasStdinFlag = true;
+    } else if (char === "O" || char === "o") {
+      consumesNextArg = true;
+    }
+  }
+  return { valid: true, hasCommandFlag, hasStdinFlag, consumesNextArg };
+}
+function getShellInvocationDescriptor(args) {
+  let readsFromStdin = false;
+  const longOptionsWithValue = /* @__PURE__ */ new Set(["--rcfile", "--init-file"]);
+  for (let index2 = 0; index2 < args.length; index2 += 1) {
+    const token = asStaticWordText(args[index2]);
+    if (token == null) {
+      return { kind: "unknown", payload: null };
+    }
+    if (token === "--") {
+      if (index2 + 1 >= args.length) {
+        break;
+      }
+      return {
+        kind: "script",
+        payload: asStaticWordText(args[index2 + 1])
+      };
+    }
+    if (token === "--command") {
+      return {
+        kind: "command",
+        payload: asStaticWordText(args[index2 + 1])
+      };
+    }
+    if (token.startsWith("--command=")) {
+      return {
+        kind: "command",
+        payload: token.slice("--command=".length)
+      };
+    }
+    if (token.startsWith("--")) {
+      if (token.includes("=")) {
+        continue;
+      }
+      if (longOptionsWithValue.has(token)) {
+        if (index2 + 1 >= args.length) {
+          return { kind: "unknown", payload: null };
+        }
+        index2 += 1;
+      }
+      continue;
+    }
+    if (token.startsWith("-") && !token.startsWith("--")) {
+      const parsed = parseShortOptionCluster(token);
+      if (!parsed.valid) {
+        return { kind: "unknown", payload: null };
+      }
+      if (parsed.hasCommandFlag) {
+        return {
+          kind: "command",
+          payload: asStaticWordText(args[index2 + 1])
+        };
+      }
+      if (parsed.hasStdinFlag) {
+        readsFromStdin = true;
+      }
+      if (parsed.consumesNextArg) {
+        if (index2 + 1 >= args.length) {
+          return { kind: "unknown", payload: null };
+        }
+        index2 += 1;
+      }
+      continue;
+    }
+    return {
+      kind: "script",
+      payload: token
+    };
+  }
+  if (readsFromStdin) {
+    return { kind: "stdin", payload: null };
+  }
+  return { kind: "none", payload: null };
+}
+function getHereDocPayload(redirections) {
+  const payloads = [];
+  for (const redirection of redirections) {
+    if (redirection.target.type !== "HereDoc") {
+      continue;
+    }
+    if (!redirection.target.content) {
+      payloads.push("");
+      continue;
+    }
+    const payload = asStaticWordText(redirection.target.content);
+    if (payload == null) {
+      return { hasHereDoc: true, payload: null };
+    }
+    payloads.push(payload);
+  }
+  if (payloads.length === 0) {
+    return { hasHereDoc: false, payload: null };
+  }
+  return { hasHereDoc: true, payload: payloads.join("\n") };
+}
+function joinStaticWords(words) {
+  const tokens = [];
+  for (const word of words) {
+    const token = asStaticWordText(word);
+    if (token == null) {
+      return null;
+    }
+    tokens.push(token);
+  }
+  return tokens.join(" ");
+}
+function resolveEnvWrapperCommand(args) {
+  let index2 = 0;
+  while (index2 < args.length) {
+    const token = asStaticWordText(args[index2]);
+    if (token == null) {
+      return { kind: "unknown" };
+    }
+    if (token === "--") {
+      index2 += 1;
+      break;
+    }
+    if (token === "-u" || token === "--unset" || token === "--chdir") {
+      if (index2 + 1 >= args.length) {
+        return { kind: "unknown" };
+      }
+      index2 += 2;
+      continue;
+    }
+    if (token.startsWith("--unset=") || token.startsWith("--chdir=")) {
+      index2 += 1;
+      continue;
+    }
+    if (token.startsWith("-") && token !== "-" && !isEnvAssignmentToken(token)) {
+      index2 += 1;
+      continue;
+    }
+    if (isEnvAssignmentToken(token)) {
+      index2 += 1;
+      continue;
+    }
+    break;
+  }
+  if (index2 >= args.length) {
+    return { kind: "none" };
+  }
+  return {
+    kind: "resolved",
+    name: args[index2],
+    args: args.slice(index2 + 1)
+  };
+}
+function resolveCommandWrapperCommand(args) {
+  let index2 = 0;
+  let lookupOnly = false;
+  while (index2 < args.length) {
+    const token = asStaticWordText(args[index2]);
+    if (token == null) {
+      return { kind: "unknown" };
+    }
+    if (token === "--") {
+      index2 += 1;
+      break;
+    }
+    if (token === "-v" || token === "-V") {
+      lookupOnly = true;
+      index2 += 1;
+      continue;
+    }
+    if (token.startsWith("-") && token !== "-") {
+      index2 += 1;
+      continue;
+    }
+    break;
+  }
+  if (lookupOnly || index2 >= args.length) {
+    return { kind: "none" };
+  }
+  return {
+    kind: "resolved",
+    name: args[index2],
+    args: args.slice(index2 + 1)
+  };
 }
-function commandContainsBlockedCommand(command, context) {
+function commandContainsBlockedCommand(command, context, mode, options = { stdinFromPipe: false }) {
   switch (command.type) {
     case "SimpleCommand":
-      return isBlockedSimpleCommand(command, context);
+      return isBlockedSimpleCommand(command, context, mode, options);
     case "If":
       return command.clauses.some(
         (clause) => statementsContainBlockedCommand(
           clause.condition,
-          cloneInspectionContext(context)
+          cloneInspectionContext(context),
+          mode
         ) || statementsContainBlockedCommand(
           clause.body,
-          cloneInspectionContext(context)
+          cloneInspectionContext(context),
+          mode
         )
       ) || (command.elseBody ? statementsContainBlockedCommand(
         command.elseBody,
-        cloneInspectionContext(context)
+        cloneInspectionContext(context),
+        mode
       ) : false);
     case "For":
     case "CStyleFor":
       return statementsContainBlockedCommand(
         command.body,
-        cloneInspectionContext(context)
+        cloneInspectionContext(context),
+        mode
       );
     case "While":
     case "Until":
       return statementsContainBlockedCommand(
         command.condition,
-        cloneInspectionContext(context)
+        cloneInspectionContext(context),
+        mode
       ) || statementsContainBlockedCommand(
         command.body,
-        cloneInspectionContext(context)
+        cloneInspectionContext(context),
+        mode
       );
     case "Case":
       return command.items.some(
         (item) => statementsContainBlockedCommand(
           item.body,
-          cloneInspectionContext(context)
+          cloneInspectionContext(context),
+          mode
         )
       );
     case "Subshell":
     case "Group":
       return statementsContainBlockedCommand(
         command.body,
-        cloneInspectionContext(context)
+        cloneInspectionContext(context),
+        mode
       );
     case "FunctionDef":
       return false;
@@ -872,16 +1176,16 @@ function commandContainsBlockedCommand(command, context) {
     }
   }
 }
-function isBlockedSimpleCommand(command, context) {
-  if (wordContainsBlockedCommand(command.name, context)) {
+function isBlockedSimpleCommand(command, context, mode, options) {
+  if (wordContainsBlockedCommand(command.name, context, mode)) {
     return true;
   }
-  if (command.args.some((arg) => wordContainsBlockedCommand(arg, context))) {
+  if (command.args.some((arg) => wordContainsBlockedCommand(arg, context, mode))) {
     return true;
   }
   if (command.assignments.some(
-    (assignment) => wordContainsBlockedCommand(assignment.value, context) || (assignment.array?.some(
-      (value) => wordContainsBlockedCommand(value, context)
+    (assignment) => wordContainsBlockedCommand(assignment.value, context, mode) || (assignment.array?.some(
+      (value) => wordContainsBlockedCommand(value, context, mode)
     ) ?? false)
   )) {
     return true;
@@ -890,11 +1194,16 @@ function isBlockedSimpleCommand(command, context) {
     if (redirection.target.type === "Word") {
       return wordContainsBlockedCommand(
         redirection.target,
-        context
+        context,
+        mode
       );
     }
     if (redirection.target.type === "HereDoc" && redirection.target.content) {
-      return wordContainsBlockedCommand(redirection.target.content, context);
+      return wordContainsBlockedCommand(
+        redirection.target.content,
+        context,
+        mode
+      );
     }
     return false;
   })) {
@@ -913,9 +1222,92 @@ function isBlockedSimpleCommand(command, context) {
   }
   if (normalizedName === "sql") {
     const subcommand = asStaticWordText(command.args[0])?.toLowerCase();
-    return !subcommand || !ALLOWED_SQL_PROXY_SUBCOMMANDS.has(subcommand);
+    if (!subcommand) {
+      return true;
+    }
+    if (mode === "block-all-sql") {
+      return true;
+    }
+    return !ALLOWED_SQL_PROXY_SUBCOMMANDS.has(subcommand);
+  }
+  const inspectWrappedCommand = (resolved) => {
+    if (resolved.kind === "none") {
+      return false;
+    }
+    if (resolved.kind === "unknown" || !resolved.name || !resolved.args) {
+      return true;
+    }
+    return isBlockedSimpleCommand(
+      {
+        name: resolved.name,
+        args: resolved.args,
+        assignments: [],
+        redirections: []
+      },
+      context,
+      "block-all-sql",
+      options
+    );
+  };
+  if (WRAPPER_COMMANDS.has(normalizedName)) {
+    if (normalizedName === "env") {
+      return inspectWrappedCommand(resolveEnvWrapperCommand(command.args));
+    }
+    if (normalizedName === "command") {
+      return inspectWrappedCommand(resolveCommandWrapperCommand(command.args));
+    }
+    const evalScript = joinStaticWords(command.args);
+    if (evalScript == null) {
+      return true;
+    }
+    if (!evalScript.trim()) {
+      return false;
+    }
+    return stringCommandContainsBlockedCommand(
+      evalScript,
+      context,
+      "block-all-sql"
+    );
+  }
+  if (SHELL_INTERPRETER_COMMANDS.has(normalizedName)) {
+    const shellInvocation = getShellInvocationDescriptor(command.args);
+    if (shellInvocation.kind === "unknown") {
+      return true;
+    }
+    if (shellInvocation.kind === "command") {
+      if (!shellInvocation.payload) {
+        return true;
+      }
+      if (stringCommandContainsBlockedCommand(
+        shellInvocation.payload,
+        context,
+        "block-all-sql"
+      )) {
+        return true;
+      }
+      return false;
+    }
+    const hereDoc = getHereDocPayload(command.redirections);
+    if (hereDoc.hasHereDoc) {
+      if (hereDoc.payload == null) {
+        return true;
+      }
+      if (hereDoc.payload.trim().length > 0 && stringCommandContainsBlockedCommand(
+        hereDoc.payload,
+        context,
+        "block-all-sql"
+      )) {
+        return true;
+      }
+    }
+    if (shellInvocation.kind === "script") {
+      return true;
+    }
+    if (options.stdinFromPipe || shellInvocation.kind === "stdin") {
+      return !hereDoc.hasHereDoc;
+    }
   }
-  if (functionInvocationContainsBlockedCommand(commandName, context)) {
+  if (functionInvocationContainsBlockedCommand(commandName, context, mode)) {
     return true;
   }
   return false;
@@ -1040,42 +1432,231 @@ import {
   defaultSettingsMiddleware,
   wrapLanguageModel
 } from "ai";
+import dedent2 from "dedent";
 import pRetry from "p-retry";
 import z4 from "zod";
 import "@deepagents/agent";
 import {
   ContextEngine as ContextEngine2,
   InMemoryContextStore as InMemoryContextStore2,
+  example,
+  fragment as fragment2,
+  guardrail,
+  hint as hint2,
   persona as persona3,
+  policy,
   structuredOutput as structuredOutput2,
-  user as user2
+  user as user2,
+  workflow
 } from "@deepagents/context";
-var RETRY_TEMPERATURES = [0, 0.2, 0.3];
+var RETRY_TEMPERATURES = [0, 0.4, 0.8];
+var SQL_AGENT_ROLE = "Expert SQL query generator.";
+var SQL_AGENT_OBJECTIVE = "Generate precise SQL grounded in provided schema.";
+var SQL_AGENT_POLICIES = [
+  fragment2(
+    "schema_mapping",
+    policy({
+      rule: "Translate natural language into precise SQL grounded in available schema entities."
+    }),
+    hint2("Preserve schema spelling exactly, including typos in column names.")
+  ),
+  fragment2(
+    "projection_minimality",
+    policy({
+      rule: "Return only columns requested by the question; do not add helper columns unless explicitly requested."
+    }),
+    policy({
+      rule: 'For requests of the form "X sorted/ordered by Y", project X only unless Y is explicitly requested as an output field.'
+    }),
+    policy({
+      rule: "Prefer selecting schema columns directly without derived expressions when direct selection answers the request."
+    }),
+    hint2(
+      "Do not include ORDER BY, GROUP BY, or JOIN helper columns in SELECT output unless the question explicitly asks for them."
+    ),
+    policy({
+      rule: "Use DISTINCT only when uniqueness is explicitly requested (for example distinct/unique/different/no duplicates)."
+    }),
+    hint2(
+      'Do not infer DISTINCT from generic wording such as "some", plural nouns, or entity-set phrasing; for transactional/attendance-style tables, default to raw rows unless uniqueness is explicitly requested.'
+    )
+  ),
+  fragment2(
+    "date_transform_safety",
+    policy({
+      rule: "Do not assume VARCHAR/TEXT values are parseable dates. Avoid date extraction functions on text columns by default."
+    }),
+    policy({
+      rule: "Use date-part extraction only when both conditions hold: the question explicitly asks for transformation and schema values require transformation to produce that unit."
+    }),
+    hint2(
+      "Do not apply SUBSTR, STRFTIME, DATE_PART, YEAR, or similar extraction functions unless the question explicitly asks for transformation and schema values require it."
+    ),
+    hint2(
+      "If a column already represents the requested concept (for example a stored year-like value), use the column as-is."
+    )
+  ),
+  fragment2(
+    "sql_minimality",
+    guardrail({
+      rule: "Never hallucinate tables or columns.",
+      reason: "Schema fidelity is required.",
+      action: "Use only available schema entities."
+    }),
+    guardrail({
+      rule: "Avoid unnecessary transformations and derived projections.",
+      reason: "Extra transformations frequently change semantics and reduce correctness.",
+      action: "Do not add date parsing, substring extraction, or derived columns unless explicitly required by the question or schema."
+    })
+  ),
+  fragment2(
+    "preflight_checklist",
+    workflow({
+      task: "Final SQL preflight before returning output",
+      steps: [
+        "Verify selected columns match the question and remove unrequested helper projections.",
+        "If aggregate values are used only for ranking/filtering, keep them out of SELECT unless explicitly requested.",
+        "Prefer raw schema columns over derived expressions when raw columns already satisfy the request.",
+        "If a candidate query uses STRFTIME, SUBSTR, DATE_PART, YEAR, or similar extraction on text-like columns, remove that transformation unless explicitly required by the question.",
+        "Return only schema-grounded SQL using existing tables and columns."
+      ]
+    })
+  ),
+  fragment2(
+    "set_semantics",
+    policy({
+      rule: "For questions asking where both condition A and condition B hold over an attribute, compute the intersection of qualifying sets for that attribute."
+    }),
+    policy({
+      rule: "Do not force the same entity instance to satisfy both conditions unless the question explicitly requests the same person/row/entity."
+    }),
+    hint2(
+      "Prefer INTERSECT (or logically equivalent set-based shape) over requiring the same physical row/entity to satisfy both conditions unless explicitly requested."
+    ),
+    hint2(
+      "When two conditions describe different row groups whose shared attribute is requested, build each group separately and intersect the attribute values."
+    ),
+    hint2(
+      "Do not collapse cross-group conditions into a single-row AND predicate when the intent is shared values across groups."
+    ),
+    policy({
+      rule: "If two predicates on the same field cannot both be true for one row, do not combine them with AND; use set operations across separate filtered subsets when shared values are requested."
+    })
+  ),
+  fragment2(
+    "predicate_column_alignment",
+    policy({
+      rule: "Match literal values to semantically compatible columns. Do not compare descriptive names to identifier columns."
+    }),
+    hint2(
+      "When a filter value is a descriptive label (for example a department name), join through the lookup table and filter on its name/title column, not on *_id columns."
+    ),
+    hint2(
+      "When relation roles are explicit in wording (for example host/home/source/destination), prefer foreign keys with matching role qualifiers over generic similarly named columns."
+    ),
+    policy({
+      rule: "When multiple foreign-key candidates exist, select the column whose qualifier best matches the relationship described in the question."
+    }),
+    policy({
+      rule: "For hosting/held semantics, prefer host_* relationship columns when available over generic *_id alternatives."
+    }),
+    hint2(
+      'Interpret wording like "held/hosted a competition or event" as a hosting relationship and map to host_* foreign keys when present.'
+    ),
+    policy({
+      rule: "Do not compare descriptive labels or names to *_id columns; join to the table containing the descriptive field and filter there."
+    }),
+    policy({
+      rule: "Keep numeric identifiers unquoted when used as numeric equality filters unless schema indicates text identifiers."
+    }),
+    policy({
+      rule: "When filtering by a descriptive label value and a related table exposes a corresponding *_name or title column, join to that table and filter on the descriptive column."
+    })
+  ),
+  fragment2(
+    "ordering_semantics",
+    policy({
+      rule: "Respect explicit sort direction terms. If direction is not specified, use ascending order unless a superlative intent (most/least/highest/lowest) implies direction."
+    }),
+    policy({
+      rule: "When ranking categories by frequency, use COUNT for ordering but keep output focused on requested category fields unless counts are explicitly requested."
+    }),
+    policy({
+      rule: "Do not use DESC unless descending direction is explicit or a superlative intent requires descending ranking."
+    }),
+    policy({
+      rule: 'For "most common/frequent <attribute>" requests, return the attribute value(s) only; use counts only for ordering/filtering unless the question explicitly asks to return counts.'
+    }),
+    hint2(
+      'Use DESC with LIMIT 1 for "most/highest/largest"; use ASC with LIMIT 1 for "least/lowest/smallest".'
+    )
+  ),
+  fragment2(
+    "negative_membership_queries",
+    policy({
+      rule: "For requests asking entities that did not participate/host/appear in related records, prefer NOT IN or NOT EXISTS against the related foreign-key set."
+    }),
+    hint2(
+      "Map role-bearing relationship columns carefully (for example host_* foreign keys for hosting relationships) instead of generic IDs when role wording is explicit."
+    ),
+    hint2(
+      'For "never had/never exceeded" conditions over history tables, exclude entities via NOT IN/NOT EXISTS against the disqualifying entity-id set (often built with GROUP BY/HAVING MAX(...)).'
+    )
+  ),
+  fragment2(
+    "join_completeness",
+    policy({
+      rule: "Preserve entity-restricting joins implied by the question. Do not widen results by querying only a broader attribute table when a subset entity table is available."
+    }),
+    policy({
+      rule: "If an entity term in the question maps to a table, keep that table in query scope and join to attribute tables rather than dropping the entity table."
+    }),
+    hint2(
+      "If the question targets a specific entity group, include that entity table and its join conditions even when selected columns come from a related table."
+    ),
+    hint2(
+      "When the question names an entity type and a relation table links to that entity via *_id, include the entity table in scope instead of counting only relation rows."
+    ),
+    hint2(
+      "Prefer INNER JOIN by default; use LEFT JOIN only when the question explicitly requests including unmatched rows or zero-related entities."
+    )
+  ),
+  fragment2(
+    "aggregation_exactness",
+    policy({
+      rule: "Preserve requested aggregation semantics exactly: use COUNT(*) by default for total rows, use COUNT(DISTINCT ...) only when uniqueness is explicitly requested, and group by stable entity keys when computing per-entity aggregates."
+    }),
+    policy({
+      rule: "For questions asking which entity has lowest/highest average of a metric, compute AVG(metric) per entity (GROUP BY entity) and rank those aggregates."
+    }),
+    hint2(
+      'For "how many <entities>" questions over relation records, default to COUNT(*) on qualifying rows unless explicit uniqueness language is present.'
+    )
+  ),
+  fragment2(
+    "query_shape_examples",
+    example({
+      question: "List categories ordered by how many records belong to each category.",
+      answer: "SELECT category FROM records GROUP BY category ORDER BY COUNT(*)"
+    }),
+    example({
+      question: "Show labels shared by rows with metric > 100 and rows with metric < 10.",
+      answer: "SELECT label FROM records WHERE metric > 100 INTERSECT SELECT label FROM records WHERE metric < 10"
+    }),
+    example({
+      question: "List locations that have not hosted any event.",
+      answer: "SELECT location_name FROM locations WHERE location_id NOT IN (SELECT host_location_id FROM events)"
+    }),
+    example({
+      question: "List the most common category across records.",
+      answer: "SELECT category FROM records GROUP BY category ORDER BY COUNT(*) DESC LIMIT 1"
+    })
+  )
+];
 function extractSql(output) {
   const match = output.match(/```sql\n?([\s\S]*?)```/);
   return match ? match[1].trim() : output.trim();
 }
-var marker = Symbol("SQLValidationError");
-var SQLValidationError = class _SQLValidationError extends Error {
-  [marker];
-  constructor(message2) {
-    super(message2);
-    this.name = "SQLValidationError";
-    this[marker] = true;
-  }
-  static isInstance(error) {
-    return error instanceof _SQLValidationError && error[marker] === true;
-  }
-};
-var UnanswerableSQLError = class _UnanswerableSQLError extends Error {
-  constructor(message2) {
-    super(message2);
-    this.name = "UnanswerableSQLError";
-  }
-  static isInstance(error) {
-    return error instanceof _UnanswerableSQLError;
-  }
-};
 async function toSql(options) {
   const { maxRetries = 3 } = options;
   return withRetry(
@@ -1088,20 +1669,38 @@ async function toSql(options) {
       context.set(
         persona3({
           name: "Freya",
-          role: "You are an expert SQL query generator. You translate natural language questions into precise, efficient SQL queries based on the provided database schema.",
-          objective: "Translate natural language questions into precise, efficient SQL queries"
+          role: SQL_AGENT_ROLE,
+          objective: SQL_AGENT_OBJECTIVE
+          // role: `You are a data science expert that provides well-reasoned and detailed responses.`,
+          // objective: `Your task is to understand the schema and generate a valid SQL query to answer the question. You first think about the reasoning process as an internal monologue and then provide the user with the answer.`,
         }),
+        ...SQL_AGENT_POLICIES,
         ...options.fragments
       );
       if (errors.length) {
+        const lastError = errors.at(-1);
         context.set(
-          user2(options.input),
-          user2(
-            `<validation_error>Your previous SQL query had the following error: ${errors.at(-1)?.message}. Please fix the query.</validation_error>`
+          user2(dedent2`
+            Answer the following question with the SQL code. Use the piece of evidence and base your answer on the database schema.
+Given the question, the evidence and the database schema, return the SQL script that addresses the question.
+Question: ${options.input}
+`),
+          UnanswerableSQLError.isInstance(lastError) ? user2(
+            `<retry_instruction>Your previous response marked the task as unanswerable. Re-evaluate using best-effort schema mapping. If the core intent is answerable with existing tables/columns, return SQL. Return error only when required core intent cannot be mapped without inventing schema elements.</retry_instruction>`
+          ) : user2(
+            `<validation_error>Your previous SQL query had the following error: ${lastError?.message}. Please fix the query.</validation_error>`
           )
         );
       } else {
-        context.set(user2(options.input));
+        context.set(
+          user2(dedent2`
+            Answer the following question with the SQL code. Use the piece of evidence and base your answer on the database schema.
+Given the question, the evidence and the database schema, return the SQL script that addresses the question.
+Question: ${options.input}
+`)
+        );
       }
       const temperature = RETRY_TEMPERATURES[attemptNumber - 1] ?? RETRY_TEMPERATURES[RETRY_TEMPERATURES.length - 1];
       const baseModel = options.model ?? groq2("openai/gpt-oss-20b");
@@ -1127,19 +1726,45 @@ async function toSql(options) {
         })
       });
       const { result: output } = await sqlOutput.generate();
+      const finalizeSql = async (rawSql) => {
+        const sql = options.adapter.format(extractSql(rawSql));
+        const validationError = await options.adapter.validate(sql);
+        if (validationError) {
+          throw new SQLValidationError(validationError);
+        }
+        return {
+          attempts,
+          sql,
+          errors: errors.length ? errors.map(formatErrorMessage) : void 0
+        };
+      };
       if ("error" in output) {
-        throw new UnanswerableSQLError(output.error);
-      }
-      const sql = options.adapter.format(extractSql(output.sql));
-      const validationError = await options.adapter.validate(sql);
-      if (validationError) {
-        throw new SQLValidationError(validationError);
+        context.set(
+          user2(
+            "<best_effort_fallback>Do not return unanswerable. Produce the best valid SQL query that answers the core intent using only available schema entities.</best_effort_fallback>"
+          )
+        );
+        const forcedSqlOutput = structuredOutput2({
+          model,
+          context,
+          schema: z4.object({
+            sql: z4.string().describe(
+              "Best-effort SQL query that answers the core intent using only available schema entities."
+            ),
+            reasoning: z4.string().describe("Reasoning steps for best-effort schema mapping.")
+          })
+        });
+        try {
+          const forced = await forcedSqlOutput.generate();
+          return await finalizeSql(forced.sql);
+        } catch (error) {
+          if (SQLValidationError.isInstance(error) || APICallError.isInstance(error) || JSONParseError.isInstance(error) || TypeValidationError.isInstance(error) || NoObjectGeneratedError.isInstance(error) || NoOutputGeneratedError.isInstance(error) || NoContentGeneratedError.isInstance(error)) {
+            throw error;
+          }
+          throw new UnanswerableSQLError(output.error);
+        }
       }
-      return {
-        attempts,
-        sql,
-        errors: errors.length ? errors.map(formatErrorMessage) : void 0
-      };
+      return await finalizeSql(output.sql);
     },
     { retries: maxRetries - 1 }
   );
@@ -1202,9 +1827,6 @@ async function withRetry(computation, options = { retries: 3 }) {
         return APICallError.isInstance(context.error) || JSONParseError.isInstance(context.error) || TypeValidationError.isInstance(context.error) || NoObjectGeneratedError.isInstance(context.error) || NoOutputGeneratedError.isInstance(context.error) || NoContentGeneratedError.isInstance(context.error);
       },
       onFailedAttempt(context) {
-        console.log(
-          `Attempt ${context.attemptNumber} failed. There are ${context.retriesLeft} retries left.`
-        );
         errors.push(context.error);
       }
     }
@@ -1213,7 +1835,7 @@ async function withRetry(computation, options = { retries: 3 }) {
 // packages/text2sql/src/lib/agents/suggestions.agents.ts
 import { groq as groq3 } from "@ai-sdk/groq";
-import dedent2 from "dedent";
+import dedent3 from "dedent";
 import z5 from "zod";
 import { agent, thirdPersonPrompt } from "@deepagents/agent";
 var suggestionsAgent = agent({
@@ -1229,7 +1851,7 @@ var suggestionsAgent = agent({
     ).min(1).max(5).describe("A set of up to two advanced question + SQL pairs.")
   }),
   prompt: (state) => {
-    return dedent2`
+    return dedent3`
       ${thirdPersonPrompt()}
       <identity>
@@ -4193,26 +4815,26 @@ var TrackedFs = class {
 // packages/text2sql/src/lib/instructions.ts
 import {
   clarification,
-  example,
+  example as example2,
   explain,
-  fragment as fragment2,
-  guardrail,
-  hint as hint2,
-  policy,
+  fragment as fragment3,
+  guardrail as guardrail2,
+  hint as hint3,
+  policy as policy2,
   principle,
   quirk,
   role,
   styleGuide,
-  workflow
+  workflow as workflow2
 } from "@deepagents/context";
 function reasoningFramework() {
   return [
     role(
       "You are a very strong reasoner and planner. Use these critical instructions to structure your plans, thoughts, and responses."
     ),
-    fragment2(
+    fragment3(
       "meta-cognitive-reasoning-framework",
-      hint2(
+      hint3(
         "Before taking any action (either tool calls *or* responses to the user), you must proactively, methodically, and independently plan and reason about:"
       ),
       // 1) Logical dependencies and constraints
@@ -4220,19 +4842,19 @@ function reasoningFramework() {
         title: "Logical dependencies and constraints",
         description: "Analyze the intended action against the following factors. Resolve conflicts in order of importance:",
         policies: [
-          policy({
+          policy2({
             rule: "Policy-based rules, mandatory prerequisites, and constraints."
           }),
-          policy({
+          policy2({
             rule: "Order of operations: Ensure taking an action does not prevent a subsequent necessary action.",
             policies: [
               "The user may request actions in a random order, but you may need to reorder operations to maximize successful completion of the task."
             ]
           }),
-          policy({
+          policy2({
             rule: "Other prerequisites (information and/or actions needed)."
           }),
-          policy({ rule: "Explicit user constraints or preferences." })
+          policy2({ rule: "Explicit user constraints or preferences." })
         ]
       }),
       // 2) Risk assessment
@@ -4285,17 +4907,17 @@ function reasoningFramework() {
         title: "Completeness",
         description: "Ensure that all requirements, constraints, options, and preferences are exhaustively incorporated into your plan.",
         policies: [
-          policy({
+          policy2({
             rule: "Resolve conflicts using the order of importance in #1."
           }),
-          policy({
+          policy2({
             rule: "Avoid premature conclusions: There may be multiple relevant options for a given situation.",
             policies: [
               "To check for whether an option is relevant, reason about all information sources from #5.",
               "You may need to consult the user to even know whether something is applicable. Do not assume it is not applicable without checking."
             ]
           }),
-          policy({
+          policy2({
             rule: "Review applicable sources of information from #5 to confirm which are relevant to the current state."
           })
         ]
@@ -4327,33 +4949,33 @@ function guidelines(options = {}) {
     // Include the meta-cognitive reasoning framework
     ...reasoningFramework(),
     // Prerequisite policies (must do X before Y)
-    fragment2(
+    fragment3(
       "prerequisite_policies",
-      policy({
+      policy2({
         rule: "YOU MUST inspect schema structure and available tables",
         before: "generating ANY SQL query",
         reason: "NEVER generate SQL without knowing valid tables, columns, and relationships"
       }),
-      policy({
+      policy2({
         rule: "YOU MUST resolve ambiguous business terms with the user",
         before: "making ANY assumptions about terminology meaning",
         reason: "NEVER guess domain-specific language\u2014ask for clarification"
       }),
-      policy({
+      policy2({
         rule: "YOU MUST validate SQL syntax",
         before: "executing ANY query against the database",
         reason: "NEVER execute unvalidated queries"
       }),
-      policy({
+      policy2({
         rule: "YOU MUST complete ALL reasoning steps",
         before: "taking ANY tool call or response action",
         reason: "Once an action is taken, it CANNOT be undone. NO EXCEPTIONS."
       })
     ),
     // Few-shot: Applying reasoning principles
-    fragment2(
+    fragment3(
       "reasoning-examples",
-      example({
+      example2({
         question: "Show me sales last month",
         answer: `Applying Principle 1 (Logical dependencies):
 - Need: schema to know which table has sales data
@@ -4365,7 +4987,7 @@ Applying Principle 5 (Information availability):
 Action: Ask user for date range clarification BEFORE generating SQL.`
       }),
-      example({
+      example2({
         question: "Why did my query return no results?",
         answer: `Applying Principle 3 (Abductive reasoning):
 - Hypothesis 1 (most likely): Filter too restrictive
@@ -4379,7 +5001,7 @@ Testing hypotheses:
 Action: Start with most likely hypothesis, test incrementally. NEVER guess.`
       }),
-      example({
+      example2({
         question: "Get me the top customers",
         answer: `Applying Principle 1 (Logical dependencies):
 - "Top" is ambiguous\u2014by revenue? by order count? by recency?
@@ -4391,16 +5013,16 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
       })
     ),
     // Schema adherence - consolidated into clear rules
-    fragment2(
+    fragment3(
       "schema_adherence",
-      hint2(
+      hint3(
         "Use only tables and columns from the schema. For unspecified columns, use SELECT *. When showing related items, include IDs and requested details."
       ),
-      hint2(
+      hint3(
         '"Show" means list items; "count" or "total" means aggregate. Use canonical values verbatim for filtering.'
       )
     ),
-    fragment2(
+    fragment3(
       "Column statistics",
       explain({
         concept: "nDistinct in column stats",
@@ -4412,18 +5034,18 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
         explanation: "Measures how closely the physical row order matches the logical sort order of the column. Values near 1 or -1 mean the data is well-ordered; near 0 means scattered",
         therefore: "High correlation means range queries (BETWEEN, >, <) on that column benefit from index scans. Low correlation means the index is less effective for ranges"
       }),
-      hint2(
+      hint3(
         "When min/max stats are available, use them to validate filter values. If a user asks for values outside the known range, warn them the query may return no results."
       )
     ),
     // Joins - use relationship metadata
-    hint2(
+    hint3(
       "Use JOINs based on schema relationships. Favor PK/indexed columns; follow relationship metadata for direction and cardinality."
     ),
     // Aggregations - explain the concepts
-    fragment2(
+    fragment3(
       "Aggregations",
-      hint2(
+      hint3(
         "Apply COUNT, SUM, AVG when the question implies summarization. Use window functions for ranking, running totals, or row comparisons."
       ),
       explain({
@@ -4433,7 +5055,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
       })
     ),
     // Query semantics - explain concepts and document quirks
-    fragment2(
+    fragment3(
       "Query interpretation",
       explain({
         concept: "threshold language",
@@ -4448,7 +5070,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
         issue: "NULL values behave unexpectedly in comparisons and aggregations",
         workaround: "Use IS NULL, IS NOT NULL, or COALESCE() to handle NULLs explicitly"
       }),
-      hint2(
+      hint3(
         "Always include mentioned filters from joined tables in WHERE conditions."
       )
     ),
@@ -4461,24 +5083,24 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
       prefer: "Concise, business-friendly summaries with key comparisons and helpful follow-ups."
     }),
     // Safety guardrails - consolidated
-    fragment2(
+    fragment3(
       "Query safety",
-      guardrail({
+      guardrail2({
         rule: "Generate only valid, executable SELECT/WITH statements.",
         reason: "Read-only access prevents data modification.",
         action: "Never generate INSERT, UPDATE, DELETE, DROP, or DDL statements."
       }),
-      guardrail({
+      guardrail2({
         rule: "Avoid unbounded scans and cartesian joins.",
         reason: "Protects performance and correctness.",
         action: "Apply filters on indexed columns. If join keys are unclear, ask for clarification."
       }),
-      guardrail({
+      guardrail2({
         rule: "Preserve query semantics.",
         reason: "Arbitrary modifications change results.",
         action: 'Only add LIMIT for explicit "top N" requests. Add ORDER BY for deterministic results.'
       }),
-      guardrail({
+      guardrail2({
         rule: "Seek clarification for genuine ambiguity.",
         reason: "Prevents incorrect assumptions.",
         action: "Ask a focused question before guessing."
@@ -4489,10 +5111,10 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
       ask: "Clarify the ranking metric or definition.",
       reason: "Ensures correct aggregation and ordering."
     }),
-    hint2(
+    hint3(
       'Use sample cell values from schema hints to match exact casing and format in WHERE conditions (e.g., "Male" vs "male" vs "M").'
     ),
-    workflow({
+    workflow2({
       task: "SQL generation",
       steps: [
         "Schema linking: identify which tables and columns are mentioned or implied by the question.",
@@ -4504,7 +5126,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
         "Verify: mentally translate SQL back to natural language. Does it match the original question?"
       ]
     }),
-    workflow({
+    workflow2({
       task: "Error recovery",
       triggers: ["SQL error", "query failed", "execution error"],
       steps: [
@@ -4517,7 +5139,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
       ],
       notes: "Maximum 3 retry attempts. If still failing, explain the issue to the user."
     }),
-    workflow({
+    workflow2({
       task: "Complex query decomposition",
       triggers: [
         "multiple conditions",
@@ -4534,7 +5156,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
       ],
       notes: "Complex questions often need CTEs (WITH clauses) for clarity and reusability."
     }),
-    workflow({
+    workflow2({
       task: "Multi-turn context",
       triggers: ["follow-up", "and also", "what about", "same but", "instead"],
       steps: [
@@ -4547,9 +5169,9 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
       ],
       notes: "If reference is ambiguous, ask which previous result or entity the user means."
     }),
-    fragment2(
+    fragment3(
       "Bash tool usage",
-      workflow({
+      workflow2({
         task: "Query execution",
         steps: [
           'Execute SQL through bash tool: sql run "SELECT ..."',
@@ -4558,16 +5180,16 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
           "For large results, slice first: cat <path> | jq '.[:10]'"
         ]
       }),
-      hint2(
+      hint3(
         `You cannot access sql through a tool, it'll fail so the proper way to access it is through the bash tool using "sql run" and "sql validate" commands.`
       ),
-      hint2(
+      hint3(
         "The sql command outputs: file path, column names (comma-separated), and row count. Use column names to construct precise jq queries."
       ),
-      hint2(
+      hint3(
         'This is virtual bash environment and "sql" commands proxy to the database hence you cannot access sql files directly.'
       ),
-      hint2(
+      hint3(
         "If a query fails, the sql command returns an error message in stderr."
       )
     )
@@ -4582,7 +5204,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
     );
   } else {
     baseTeachings.push(
-      hint2(
+      hint3(
         'When a month, day, or time period is mentioned without a year (e.g., "in August", "on Monday"), assume ALL occurrences of that period in the data. Do not ask for year clarification.'
       )
     );