npm - @deepagents/text2sql - Versions diffs - 0.19.0 → 0.20.0 - Mend

@deepagents/text2sql 0.19.0 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/dist/index.d.ts +1 -0
package/dist/index.d.ts.map +1 -1
package/dist/index.js +210 -65
package/dist/index.js.map +4 -4
package/dist/lib/agents/exceptions.d.ts +20 -0
package/dist/lib/agents/exceptions.d.ts.map +1 -0
package/dist/lib/agents/sql.agent.d.ts +0 -17
package/dist/lib/agents/sql.agent.d.ts.map +1 -1
package/dist/lib/synthesis/index.js +224 -79
package/dist/lib/synthesis/index.js.map +4 -4
package/dist/lib/synthesis/synthesizers/depth-evolver.d.ts.map +1 -1
package/dist/lib/synthesis/synthesizers/schema-synthesizer.d.ts.map +1 -1
package/package.json +4 -4

package/dist/index.d.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 export * from './lib/adapters/adapter.ts';
 export * from './lib/agents/developer.agent.ts';
+export * from './lib/agents/exceptions.ts';
 export * from './lib/agents/result-tools.ts';
 export * from './lib/agents/sql.agent.ts';
 export * from './lib/agents/suggestions.agents.ts';

package/dist/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,2BAA2B,CAAC;AAC1C,cAAc,iCAAiC,CAAC;AAChD,cAAc,8BAA8B,CAAC;AAC7C,cAAc,2BAA2B,CAAC;AAC1C,cAAc,oCAAoC,CAAC;AACnD,cAAc,qBAAqB,CAAC;AACpC,cAAc,qBAAqB,CAAC;AACpC,cAAc,2BAA2B,CAAC;AAC1C,cAAc,mBAAmB,CAAC;AAClC,cAAc,uBAAuB,CAAC;AACtC,cAAc,cAAc,CAAC"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,2BAA2B,CAAC;AAC1C,cAAc,iCAAiC,CAAC;AAChD,cAAc,4BAA4B,CAAC;AAC3C,cAAc,8BAA8B,CAAC;AAC7C,cAAc,2BAA2B,CAAC;AAC1C,cAAc,oCAAoC,CAAC;AACnD,cAAc,qBAAqB,CAAC;AACpC,cAAc,qBAAqB,CAAC;AACpC,cAAc,2BAA2B,CAAC;AAC1C,cAAc,mBAAmB,CAAC;AAClC,cAAc,uBAAuB,CAAC;AACtC,cAAc,cAAc,CAAC"}

package/dist/index.js CHANGED Viewed

@@ -511,6 +511,32 @@ var fragments = [
   hint("When validating user SQL, explain any errors clearly")
 ];
+// packages/text2sql/src/lib/agents/exceptions.ts
+var sqlValidationMarker = Symbol("SQLValidationError");
+var unanswerableSqlMarker = Symbol("UnanswerableSQLError");
+var SQLValidationError = class _SQLValidationError extends Error {
+  [sqlValidationMarker];
+  constructor(message2) {
+    super(message2);
+    this.name = "SQLValidationError";
+    this[sqlValidationMarker] = true;
+  }
+  static isInstance(error) {
+    return error instanceof _SQLValidationError && error[sqlValidationMarker] === true;
+  }
+};
+var UnanswerableSQLError = class _UnanswerableSQLError extends Error {
+  [unanswerableSqlMarker];
+  constructor(message2) {
+    super(message2);
+    this.name = "UnanswerableSQLError";
+    this[unanswerableSqlMarker] = true;
+  }
+  static isInstance(error) {
+    return error instanceof _UnanswerableSQLError && error[unanswerableSqlMarker] === true;
+  }
+};
 // packages/text2sql/src/lib/agents/result-tools.ts
 import { tool as tool2 } from "ai";
 import { createBashTool } from "bash-tool";
@@ -1040,42 +1066,120 @@ import {
   defaultSettingsMiddleware,
   wrapLanguageModel
 } from "ai";
+import dedent2 from "dedent";
 import pRetry from "p-retry";
 import z4 from "zod";
 import "@deepagents/agent";
 import {
   ContextEngine as ContextEngine2,
   InMemoryContextStore as InMemoryContextStore2,
+  fragment as fragment2,
   persona as persona3,
+  policy,
   structuredOutput as structuredOutput2,
   user as user2
 } from "@deepagents/context";
 var RETRY_TEMPERATURES = [0, 0.2, 0.3];
+var SQL_AGENT_ROLE = "Expert SQL query generator.";
+var SQL_AGENT_OBJECTIVE = "Generate precise SQL grounded in provided schema.";
+var SQL_AGENT_POLICIES = [
+  fragment2(
+    "schema_mapping",
+    policy({
+      rule: "Translate natural language into precise SQL grounded in available schema entities."
+    })
+    // policy({
+    //   rule: 'Before returning an error, perform a schema-grounded self-check: identify core intent, draft best-effort SQL, then verify it uses only existing tables/columns.',
+    // }),
+    // policy({
+    //   rule: 'Return unanswerable only if that self-check confirms no valid SQL can express the required intent without inventing schema elements.',
+    // }),
+    // policy({
+    //   rule: 'Prefer a best-effort valid SQL query when entities can be reasonably inferred from table or column names.',
+    // }),
+    // policy({
+    //   rule: 'Use lexical normalization (singular/plural, paraphrases, role synonyms, and minor wording differences) to align question terms with schema names.',
+    // }),
+    // policy({
+    //   rule: 'Decompose noun phrases into core entity and qualifiers, and map the core entity first.',
+    // }),
+    // policy({
+    //   rule: 'Do not require every descriptive word to map to a separate schema field when the core entity match is unambiguous.',
+    // }),
+    // policy({
+    //   rule: 'For phrases like "X of Y", treat Y as contextual (non-blocking) when Y has no mapped schema field and the question does not ask to filter/group/select by Y explicitly.',
+    // }),
+    // policy({
+    //   rule: 'Treat unmatched qualifiers as blockers only when they are restrictive constraints (specific values, comparisons, or conditions that change row eligibility).',
+    // }),
+    // hint('Preserve schema spelling exactly, including typos in column names.'),
+  )
+  // fragment(
+  //   'unanswerable_gate',
+  //   workflow({
+  //     task: 'Unanswerable decision',
+  //     steps: [
+  //       'Identify the core intent (metric/projection and required filters).',
+  //       'Attempt schema-grounded mapping for the core intent before considering error.',
+  //       'If a valid SELECT can answer the core intent without inventing schema entities, return SQL.',
+  //       'Return unanswerable only when required information cannot be mapped to any available table or column.',
+  //     ],
+  //   }),
+  //   policy({
+  //     rule: 'Do not reject a question as unanswerable when requested information can be derived by filtering, joining, grouping, counting, set operations, or sorting on available columns.',
+  //   }),
+  // ),
+  // fragment(
+  //   'query_shape_preferences',
+  //   hint(
+  //     'Prefer explicit INNER JOINs over LEFT JOINs unless the question requires unmatched rows.',
+  //   ),
+  //   hint(
+  //     'Prefer direct joins over dropping join constraints or using weaker alternatives.',
+  //   ),
+  //   hint('Use DISTINCT only when uniqueness is explicitly requested.'),
+  //   hint(
+  //     'For superlatives over grouped entities (most/least/highest/lowest by group), prefer GROUP BY with ORDER BY aggregate and LIMIT 1.',
+  //   ),
+  //   hint(
+  //     'For average/count conditions per entity, prefer GROUP BY with HAVING aggregate predicates over row-level WHERE predicates.',
+  //   ),
+  //   hint(
+  //     'For "both" conditions across two criteria, prefer INTERSECT when selecting shared values.',
+  //   ),
+  //   hint(
+  //     'For "A or B" retrieval across criteria, prefer UNION when combining two qualifying sets.',
+  //   ),
+  //   hint(
+  //     'For "never" constraints against related records, prefer NOT IN or EXCEPT against the disqualifying set.',
+  //   ),
+  //   hint(
+  //     'Use equality predicates for exact values unless the question asks for pattern matching.',
+  //   ),
+  //   hint(
+  //     'Keep numeric literals unquoted when they are purely numeric tokens in the question.',
+  //   ),
+  // ),
+  // fragment(
+  //   'sql_minimality',
+  //   guardrail({
+  //     rule: 'Never hallucinate tables or columns.',
+  //     reason: 'Schema fidelity is required.',
+  //     action: 'Use only available schema entities.',
+  //   }),
+  //   guardrail({
+  //     rule: 'Prefer the minimal query over transformed expressions.',
+  //     reason:
+  //       'Unnecessary transformations reduce correctness and add avoidable complexity.',
+  //     action:
+  //       'Do not add date parsing, substring extraction, derived projections, or extra selected columns unless explicitly requested or required by schema mismatch.',
+  //   }),
+  // ),
+];
 function extractSql(output) {
   const match = output.match(/```sql\n?([\s\S]*?)```/);
   return match ? match[1].trim() : output.trim();
 }
-var marker = Symbol("SQLValidationError");
-var SQLValidationError = class _SQLValidationError extends Error {
-  [marker];
-  constructor(message2) {
-    super(message2);
-    this.name = "SQLValidationError";
-    this[marker] = true;
-  }
-  static isInstance(error) {
-    return error instanceof _SQLValidationError && error[marker] === true;
-  }
-};
-var UnanswerableSQLError = class _UnanswerableSQLError extends Error {
-  constructor(message2) {
-    super(message2);
-    this.name = "UnanswerableSQLError";
-  }
-  static isInstance(error) {
-    return error instanceof _UnanswerableSQLError;
-  }
-};
 async function toSql(options) {
   const { maxRetries = 3 } = options;
   return withRetry(
@@ -1088,20 +1192,38 @@ async function toSql(options) {
       context.set(
         persona3({
           name: "Freya",
-          role: "You are an expert SQL query generator. You translate natural language questions into precise, efficient SQL queries based on the provided database schema.",
-          objective: "Translate natural language questions into precise, efficient SQL queries"
+          role: SQL_AGENT_ROLE,
+          objective: SQL_AGENT_OBJECTIVE
+          // role: `You are a data science expert that provides well-reasoned and detailed responses.`,
+          // objective: `Your task is to understand the schema and generate a valid SQL query to answer the question. You first think about the reasoning process as an internal monologue and then provide the user with the answer.`,
         }),
+        ...SQL_AGENT_POLICIES,
         ...options.fragments
       );
       if (errors.length) {
+        const lastError = errors.at(-1);
         context.set(
-          user2(options.input),
-          user2(
-            `<validation_error>Your previous SQL query had the following error: ${errors.at(-1)?.message}. Please fix the query.</validation_error>`
+          user2(dedent2`
+            Answer the following question with the SQL code. Use the piece of evidence and base your answer on the database schema.
+Given the question, the evidence and the database schema, return the SQL script that addresses the question.
+Question: ${options.input}
+`),
+          UnanswerableSQLError.isInstance(lastError) ? user2(
+            `<retry_instruction>Your previous response marked the task as unanswerable. Re-evaluate using best-effort schema mapping. If the core intent is answerable with existing tables/columns, return SQL. Return error only when required core intent cannot be mapped without inventing schema elements.</retry_instruction>`
+          ) : user2(
+            `<validation_error>Your previous SQL query had the following error: ${lastError?.message}. Please fix the query.</validation_error>`
           )
         );
       } else {
-        context.set(user2(options.input));
+        context.set(
+          user2(dedent2`
+            Answer the following question with the SQL code. Use the piece of evidence and base your answer on the database schema.
+Given the question, the evidence and the database schema, return the SQL script that addresses the question.
+Question: ${options.input}
+`)
+        );
       }
       const temperature = RETRY_TEMPERATURES[attemptNumber - 1] ?? RETRY_TEMPERATURES[RETRY_TEMPERATURES.length - 1];
       const baseModel = options.model ?? groq2("openai/gpt-oss-20b");
@@ -1127,19 +1249,45 @@ async function toSql(options) {
         })
       });
       const { result: output } = await sqlOutput.generate();
+      const finalizeSql = async (rawSql) => {
+        const sql = options.adapter.format(extractSql(rawSql));
+        const validationError = await options.adapter.validate(sql);
+        if (validationError) {
+          throw new SQLValidationError(validationError);
+        }
+        return {
+          attempts,
+          sql,
+          errors: errors.length ? errors.map(formatErrorMessage) : void 0
+        };
+      };
       if ("error" in output) {
-        throw new UnanswerableSQLError(output.error);
-      }
-      const sql = options.adapter.format(extractSql(output.sql));
-      const validationError = await options.adapter.validate(sql);
-      if (validationError) {
-        throw new SQLValidationError(validationError);
+        context.set(
+          user2(
+            "<best_effort_fallback>Do not return unanswerable. Produce the best valid SQL query that answers the core intent using only available schema entities.</best_effort_fallback>"
+          )
+        );
+        const forcedSqlOutput = structuredOutput2({
+          model,
+          context,
+          schema: z4.object({
+            sql: z4.string().describe(
+              "Best-effort SQL query that answers the core intent using only available schema entities."
+            ),
+            reasoning: z4.string().describe("Reasoning steps for best-effort schema mapping.")
+          })
+        });
+        try {
+          const forced = await forcedSqlOutput.generate();
+          return await finalizeSql(forced.sql);
+        } catch (error) {
+          if (SQLValidationError.isInstance(error) || APICallError.isInstance(error) || JSONParseError.isInstance(error) || TypeValidationError.isInstance(error) || NoObjectGeneratedError.isInstance(error) || NoOutputGeneratedError.isInstance(error) || NoContentGeneratedError.isInstance(error)) {
+            throw error;
+          }
+          throw new UnanswerableSQLError(output.error);
+        }
       }
-      return {
-        attempts,
-        sql,
-        errors: errors.length ? errors.map(formatErrorMessage) : void 0
-      };
+      return await finalizeSql(output.sql);
     },
     { retries: maxRetries - 1 }
   );
@@ -1202,9 +1350,6 @@ async function withRetry(computation, options = { retries: 3 }) {
         return APICallError.isInstance(context.error) || JSONParseError.isInstance(context.error) || TypeValidationError.isInstance(context.error) || NoObjectGeneratedError.isInstance(context.error) || NoOutputGeneratedError.isInstance(context.error) || NoContentGeneratedError.isInstance(context.error);
       },
       onFailedAttempt(context) {
-        console.log(
-          `Attempt ${context.attemptNumber} failed. There are ${context.retriesLeft} retries left.`
-        );
         errors.push(context.error);
       }
     }
@@ -1213,7 +1358,7 @@ async function withRetry(computation, options = { retries: 3 }) {
 // packages/text2sql/src/lib/agents/suggestions.agents.ts
 import { groq as groq3 } from "@ai-sdk/groq";
-import dedent2 from "dedent";
+import dedent3 from "dedent";
 import z5 from "zod";
 import { agent, thirdPersonPrompt } from "@deepagents/agent";
 var suggestionsAgent = agent({
@@ -1229,7 +1374,7 @@ var suggestionsAgent = agent({
     ).min(1).max(5).describe("A set of up to two advanced question + SQL pairs.")
   }),
   prompt: (state) => {
-    return dedent2`
+    return dedent3`
       ${thirdPersonPrompt()}
       <identity>
@@ -4195,10 +4340,10 @@ import {
   clarification,
   example,
   explain,
-  fragment as fragment2,
+  fragment as fragment3,
   guardrail,
   hint as hint2,
-  policy,
+  policy as policy2,
   principle,
   quirk,
   role,
@@ -4210,7 +4355,7 @@ function reasoningFramework() {
     role(
       "You are a very strong reasoner and planner. Use these critical instructions to structure your plans, thoughts, and responses."
     ),
-    fragment2(
+    fragment3(
       "meta-cognitive-reasoning-framework",
       hint2(
         "Before taking any action (either tool calls *or* responses to the user), you must proactively, methodically, and independently plan and reason about:"
@@ -4220,19 +4365,19 @@ function reasoningFramework() {
         title: "Logical dependencies and constraints",
         description: "Analyze the intended action against the following factors. Resolve conflicts in order of importance:",
         policies: [
-          policy({
+          policy2({
             rule: "Policy-based rules, mandatory prerequisites, and constraints."
           }),
-          policy({
+          policy2({
             rule: "Order of operations: Ensure taking an action does not prevent a subsequent necessary action.",
             policies: [
               "The user may request actions in a random order, but you may need to reorder operations to maximize successful completion of the task."
             ]
           }),
-          policy({
+          policy2({
             rule: "Other prerequisites (information and/or actions needed)."
           }),
-          policy({ rule: "Explicit user constraints or preferences." })
+          policy2({ rule: "Explicit user constraints or preferences." })
         ]
       }),
       // 2) Risk assessment
@@ -4285,17 +4430,17 @@ function reasoningFramework() {
         title: "Completeness",
         description: "Ensure that all requirements, constraints, options, and preferences are exhaustively incorporated into your plan.",
         policies: [
-          policy({
+          policy2({
             rule: "Resolve conflicts using the order of importance in #1."
           }),
-          policy({
+          policy2({
             rule: "Avoid premature conclusions: There may be multiple relevant options for a given situation.",
             policies: [
               "To check for whether an option is relevant, reason about all information sources from #5.",
               "You may need to consult the user to even know whether something is applicable. Do not assume it is not applicable without checking."
             ]
           }),
-          policy({
+          policy2({
             rule: "Review applicable sources of information from #5 to confirm which are relevant to the current state."
           })
         ]
@@ -4327,31 +4472,31 @@ function guidelines(options = {}) {
     // Include the meta-cognitive reasoning framework
     ...reasoningFramework(),
     // Prerequisite policies (must do X before Y)
-    fragment2(
+    fragment3(
       "prerequisite_policies",
-      policy({
+      policy2({
         rule: "YOU MUST inspect schema structure and available tables",
         before: "generating ANY SQL query",
         reason: "NEVER generate SQL without knowing valid tables, columns, and relationships"
       }),
-      policy({
+      policy2({
         rule: "YOU MUST resolve ambiguous business terms with the user",
         before: "making ANY assumptions about terminology meaning",
         reason: "NEVER guess domain-specific language\u2014ask for clarification"
       }),
-      policy({
+      policy2({
         rule: "YOU MUST validate SQL syntax",
         before: "executing ANY query against the database",
         reason: "NEVER execute unvalidated queries"
       }),
-      policy({
+      policy2({
         rule: "YOU MUST complete ALL reasoning steps",
         before: "taking ANY tool call or response action",
         reason: "Once an action is taken, it CANNOT be undone. NO EXCEPTIONS."
       })
     ),
     // Few-shot: Applying reasoning principles
-    fragment2(
+    fragment3(
       "reasoning-examples",
       example({
         question: "Show me sales last month",
@@ -4391,7 +4536,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
       })
     ),
     // Schema adherence - consolidated into clear rules
-    fragment2(
+    fragment3(
       "schema_adherence",
       hint2(
         "Use only tables and columns from the schema. For unspecified columns, use SELECT *. When showing related items, include IDs and requested details."
@@ -4400,7 +4545,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
         '"Show" means list items; "count" or "total" means aggregate. Use canonical values verbatim for filtering.'
       )
     ),
-    fragment2(
+    fragment3(
       "Column statistics",
       explain({
         concept: "nDistinct in column stats",
@@ -4421,7 +4566,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
       "Use JOINs based on schema relationships. Favor PK/indexed columns; follow relationship metadata for direction and cardinality."
     ),
     // Aggregations - explain the concepts
-    fragment2(
+    fragment3(
       "Aggregations",
       hint2(
         "Apply COUNT, SUM, AVG when the question implies summarization. Use window functions for ranking, running totals, or row comparisons."
@@ -4433,7 +4578,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
       })
     ),
     // Query semantics - explain concepts and document quirks
-    fragment2(
+    fragment3(
       "Query interpretation",
       explain({
         concept: "threshold language",
@@ -4461,7 +4606,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
       prefer: "Concise, business-friendly summaries with key comparisons and helpful follow-ups."
     }),
     // Safety guardrails - consolidated
-    fragment2(
+    fragment3(
       "Query safety",
       guardrail({
         rule: "Generate only valid, executable SELECT/WITH statements.",
@@ -4547,7 +4692,7 @@ Action: Ask user: "Top by what metric\u2014total revenue, number of orders, or m
       ],
       notes: "If reference is ambiguous, ask which previous result or entity the user means."
     }),
-    fragment2(
+    fragment3(
       "Bash tool usage",
       workflow({
         task: "Query execution",