npm - @deepagents/text2sql - Versions diffs - 0.18.0 → 0.20.0 - Mend

@deepagents/text2sql 0.18.0 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/dist/index.d.ts +1 -0
package/dist/index.d.ts.map +1 -1
package/dist/index.js +526 -71
package/dist/index.js.map +4 -4
package/dist/lib/agents/exceptions.d.ts +20 -0
package/dist/lib/agents/exceptions.d.ts.map +1 -0
package/dist/lib/agents/result-tools.d.ts +15 -10
package/dist/lib/agents/result-tools.d.ts.map +1 -1
package/dist/lib/agents/sql.agent.d.ts +2 -21
package/dist/lib/agents/sql.agent.d.ts.map +1 -1
package/dist/lib/sql.d.ts.map +1 -1
package/dist/lib/synthesis/index.js +227 -87
package/dist/lib/synthesis/index.js.map +4 -4
package/dist/lib/synthesis/synthesizers/depth-evolver.d.ts.map +1 -1
package/dist/lib/synthesis/synthesizers/schema-synthesizer.d.ts.map +1 -1
package/package.json +8 -8

package/dist/lib/synthesis/index.js CHANGED Viewed

@@ -714,6 +714,32 @@ var LastQueryExtractor = class extends BaseContextualExtractor {
 // packages/text2sql/src/lib/synthesis/synthesizers/schema-synthesizer.ts
 import pLimit from "p-limit";
+// packages/text2sql/src/lib/agents/exceptions.ts
+var sqlValidationMarker = Symbol("SQLValidationError");
+var unanswerableSqlMarker = Symbol("UnanswerableSQLError");
+var SQLValidationError = class _SQLValidationError extends Error {
+  [sqlValidationMarker];
+  constructor(message) {
+    super(message);
+    this.name = "SQLValidationError";
+    this[sqlValidationMarker] = true;
+  }
+  static isInstance(error) {
+    return error instanceof _SQLValidationError && error[sqlValidationMarker] === true;
+  }
+};
+var UnanswerableSQLError = class _UnanswerableSQLError extends Error {
+  [unanswerableSqlMarker];
+  constructor(message) {
+    super(message);
+    this.name = "UnanswerableSQLError";
+    this[unanswerableSqlMarker] = true;
+  }
+  static isInstance(error) {
+    return error instanceof _UnanswerableSQLError && error[unanswerableSqlMarker] === true;
+  }
+};
 // packages/text2sql/src/lib/agents/question.agent.ts
 import { groq as groq4 } from "@ai-sdk/groq";
 import dedent4 from "dedent";
@@ -838,42 +864,120 @@ import {
   defaultSettingsMiddleware,
   wrapLanguageModel
 } from "ai";
+import dedent5 from "dedent";
 import pRetry from "p-retry";
 import z5 from "zod";
 import "@deepagents/agent";
 import {
   ContextEngine as ContextEngine5,
   InMemoryContextStore as InMemoryContextStore5,
+  fragment as fragment5,
   persona as persona5,
+  policy,
   structuredOutput as structuredOutput5,
   user as user5
 } from "@deepagents/context";
 var RETRY_TEMPERATURES = [0, 0.2, 0.3];
+var SQL_AGENT_ROLE = "Expert SQL query generator.";
+var SQL_AGENT_OBJECTIVE = "Generate precise SQL grounded in provided schema.";
+var SQL_AGENT_POLICIES = [
+  fragment5(
+    "schema_mapping",
+    policy({
+      rule: "Translate natural language into precise SQL grounded in available schema entities."
+    })
+    // policy({
+    //   rule: 'Before returning an error, perform a schema-grounded self-check: identify core intent, draft best-effort SQL, then verify it uses only existing tables/columns.',
+    // }),
+    // policy({
+    //   rule: 'Return unanswerable only if that self-check confirms no valid SQL can express the required intent without inventing schema elements.',
+    // }),
+    // policy({
+    //   rule: 'Prefer a best-effort valid SQL query when entities can be reasonably inferred from table or column names.',
+    // }),
+    // policy({
+    //   rule: 'Use lexical normalization (singular/plural, paraphrases, role synonyms, and minor wording differences) to align question terms with schema names.',
+    // }),
+    // policy({
+    //   rule: 'Decompose noun phrases into core entity and qualifiers, and map the core entity first.',
+    // }),
+    // policy({
+    //   rule: 'Do not require every descriptive word to map to a separate schema field when the core entity match is unambiguous.',
+    // }),
+    // policy({
+    //   rule: 'For phrases like "X of Y", treat Y as contextual (non-blocking) when Y has no mapped schema field and the question does not ask to filter/group/select by Y explicitly.',
+    // }),
+    // policy({
+    //   rule: 'Treat unmatched qualifiers as blockers only when they are restrictive constraints (specific values, comparisons, or conditions that change row eligibility).',
+    // }),
+    // hint('Preserve schema spelling exactly, including typos in column names.'),
+  )
+  // fragment(
+  //   'unanswerable_gate',
+  //   workflow({
+  //     task: 'Unanswerable decision',
+  //     steps: [
+  //       'Identify the core intent (metric/projection and required filters).',
+  //       'Attempt schema-grounded mapping for the core intent before considering error.',
+  //       'If a valid SELECT can answer the core intent without inventing schema entities, return SQL.',
+  //       'Return unanswerable only when required information cannot be mapped to any available table or column.',
+  //     ],
+  //   }),
+  //   policy({
+  //     rule: 'Do not reject a question as unanswerable when requested information can be derived by filtering, joining, grouping, counting, set operations, or sorting on available columns.',
+  //   }),
+  // ),
+  // fragment(
+  //   'query_shape_preferences',
+  //   hint(
+  //     'Prefer explicit INNER JOINs over LEFT JOINs unless the question requires unmatched rows.',
+  //   ),
+  //   hint(
+  //     'Prefer direct joins over dropping join constraints or using weaker alternatives.',
+  //   ),
+  //   hint('Use DISTINCT only when uniqueness is explicitly requested.'),
+  //   hint(
+  //     'For superlatives over grouped entities (most/least/highest/lowest by group), prefer GROUP BY with ORDER BY aggregate and LIMIT 1.',
+  //   ),
+  //   hint(
+  //     'For average/count conditions per entity, prefer GROUP BY with HAVING aggregate predicates over row-level WHERE predicates.',
+  //   ),
+  //   hint(
+  //     'For "both" conditions across two criteria, prefer INTERSECT when selecting shared values.',
+  //   ),
+  //   hint(
+  //     'For "A or B" retrieval across criteria, prefer UNION when combining two qualifying sets.',
+  //   ),
+  //   hint(
+  //     'For "never" constraints against related records, prefer NOT IN or EXCEPT against the disqualifying set.',
+  //   ),
+  //   hint(
+  //     'Use equality predicates for exact values unless the question asks for pattern matching.',
+  //   ),
+  //   hint(
+  //     'Keep numeric literals unquoted when they are purely numeric tokens in the question.',
+  //   ),
+  // ),
+  // fragment(
+  //   'sql_minimality',
+  //   guardrail({
+  //     rule: 'Never hallucinate tables or columns.',
+  //     reason: 'Schema fidelity is required.',
+  //     action: 'Use only available schema entities.',
+  //   }),
+  //   guardrail({
+  //     rule: 'Prefer the minimal query over transformed expressions.',
+  //     reason:
+  //       'Unnecessary transformations reduce correctness and add avoidable complexity.',
+  //     action:
+  //       'Do not add date parsing, substring extraction, derived projections, or extra selected columns unless explicitly requested or required by schema mismatch.',
+  //   }),
+  // ),
+];
 function extractSql(output) {
   const match = output.match(/```sql\n?([\s\S]*?)```/);
   return match ? match[1].trim() : output.trim();
 }
-var marker = Symbol("SQLValidationError");
-var SQLValidationError = class _SQLValidationError extends Error {
-  [marker];
-  constructor(message) {
-    super(message);
-    this.name = "SQLValidationError";
-    this[marker] = true;
-  }
-  static isInstance(error) {
-    return error instanceof _SQLValidationError && error[marker] === true;
-  }
-};
-var UnanswerableSQLError = class _UnanswerableSQLError extends Error {
-  constructor(message) {
-    super(message);
-    this.name = "UnanswerableSQLError";
-  }
-  static isInstance(error) {
-    return error instanceof _UnanswerableSQLError;
-  }
-};
 async function toSql(options) {
   const { maxRetries = 3 } = options;
   return withRetry(
@@ -886,21 +990,38 @@ async function toSql(options) {
       context.set(
         persona5({
           name: "Freya",
-          role: "You are an expert SQL query generator. You translate natural language questions into precise, efficient SQL queries based on the provided database schema.",
-          objective: "Translate natural language questions into precise, efficient SQL queries"
+          role: SQL_AGENT_ROLE,
+          objective: SQL_AGENT_OBJECTIVE
+          // role: `You are a data science expert that provides well-reasoned and detailed responses.`,
+          // objective: `Your task is to understand the schema and generate a valid SQL query to answer the question. You first think about the reasoning process as an internal monologue and then provide the user with the answer.`,
         }),
-        ...options.instructions,
-        ...options.schemaFragments
+        ...SQL_AGENT_POLICIES,
+        ...options.fragments
       );
       if (errors.length) {
+        const lastError = errors.at(-1);
         context.set(
-          user5(options.input),
-          user5(
-            `<validation_error>Your previous SQL query had the following error: ${errors.at(-1)?.message}. Please fix the query.</validation_error>`
+          user5(dedent5`
+            Answer the following question with the SQL code. Use the piece of evidence and base your answer on the database schema.
+Given the question, the evidence and the database schema, return the SQL script that addresses the question.
+Question: ${options.input}
+`),
+          UnanswerableSQLError.isInstance(lastError) ? user5(
+            `<retry_instruction>Your previous response marked the task as unanswerable. Re-evaluate using best-effort schema mapping. If the core intent is answerable with existing tables/columns, return SQL. Return error only when required core intent cannot be mapped without inventing schema elements.</retry_instruction>`
+          ) : user5(
+            `<validation_error>Your previous SQL query had the following error: ${lastError?.message}. Please fix the query.</validation_error>`
           )
         );
       } else {
-        context.set(user5(options.input));
+        context.set(
+          user5(dedent5`
+            Answer the following question with the SQL code. Use the piece of evidence and base your answer on the database schema.
+Given the question, the evidence and the database schema, return the SQL script that addresses the question.
+Question: ${options.input}
+`)
+        );
       }
       const temperature = RETRY_TEMPERATURES[attemptNumber - 1] ?? RETRY_TEMPERATURES[RETRY_TEMPERATURES.length - 1];
       const baseModel = options.model ?? groq5("openai/gpt-oss-20b");
@@ -926,19 +1047,45 @@ async function toSql(options) {
         })
       });
       const { result: output } = await sqlOutput.generate();
+      const finalizeSql = async (rawSql) => {
+        const sql = options.adapter.format(extractSql(rawSql));
+        const validationError = await options.adapter.validate(sql);
+        if (validationError) {
+          throw new SQLValidationError(validationError);
+        }
+        return {
+          attempts,
+          sql,
+          errors: errors.length ? errors.map(formatErrorMessage) : void 0
+        };
+      };
       if ("error" in output) {
-        throw new UnanswerableSQLError(output.error);
-      }
-      const sql = options.adapter.format(extractSql(output.sql));
-      const validationError = await options.adapter.validate(sql);
-      if (validationError) {
-        throw new SQLValidationError(validationError);
+        context.set(
+          user5(
+            "<best_effort_fallback>Do not return unanswerable. Produce the best valid SQL query that answers the core intent using only available schema entities.</best_effort_fallback>"
+          )
+        );
+        const forcedSqlOutput = structuredOutput5({
+          model,
+          context,
+          schema: z5.object({
+            sql: z5.string().describe(
+              "Best-effort SQL query that answers the core intent using only available schema entities."
+            ),
+            reasoning: z5.string().describe("Reasoning steps for best-effort schema mapping.")
+          })
+        });
+        try {
+          const forced = await forcedSqlOutput.generate();
+          return await finalizeSql(forced.sql);
+        } catch (error) {
+          if (SQLValidationError.isInstance(error) || APICallError.isInstance(error) || JSONParseError.isInstance(error) || TypeValidationError.isInstance(error) || NoObjectGeneratedError.isInstance(error) || NoOutputGeneratedError.isInstance(error) || NoContentGeneratedError.isInstance(error)) {
+            throw error;
+          }
+          throw new UnanswerableSQLError(output.error);
+        }
       }
-      return {
-        attempts,
-        sql,
-        errors: errors.length ? errors.map(formatErrorMessage) : void 0
-      };
+      return await finalizeSql(output.sql);
     },
     { retries: maxRetries - 1 }
   );
@@ -1001,9 +1148,6 @@ async function withRetry(computation, options = { retries: 3 }) {
         return APICallError.isInstance(context.error) || JSONParseError.isInstance(context.error) || TypeValidationError.isInstance(context.error) || NoObjectGeneratedError.isInstance(context.error) || NoOutputGeneratedError.isInstance(context.error) || NoContentGeneratedError.isInstance(context.error);
       },
       onFailedAttempt(context) {
-        console.log(
-          `Attempt ${context.attemptNumber} failed. There are ${context.retriesLeft} retries left.`
-        );
         errors.push(context.error);
       }
     }
@@ -1076,9 +1220,7 @@ Generate ${this.options.count} questions at ${complexity} complexity.` : void 0;
             return await toSql({
               input: question,
               adapter: this.adapter,
-              schemaFragments: [],
-              // Placeholder - needs to pass actual fragments
-              instructions: this.options.teachings ?? [],
+              fragments: this.options.teachings ?? [],
               model: this.options.model
             });
           } catch (error) {
@@ -1107,14 +1249,14 @@ Generate ${this.options.count} questions at ${complexity} complexity.` : void 0;
 // packages/text2sql/src/lib/synthesis/synthesizers/breadth-evolver.ts
 import { groq as groq6 } from "@ai-sdk/groq";
-import dedent5 from "dedent";
+import dedent6 from "dedent";
 import pLimit2 from "p-limit";
 import z6 from "zod";
 import "@deepagents/agent";
 import {
   ContextEngine as ContextEngine6,
   InMemoryContextStore as InMemoryContextStore6,
-  fragment as fragment5,
+  fragment as fragment6,
   guardrail as guardrail2,
   persona as personaFragment,
   structuredOutput as structuredOutput6,
@@ -1166,7 +1308,7 @@ async function paraphraseQuestion(params) {
     chatId: `paraphraser-${crypto.randomUUID()}`,
     userId: "system"
   });
-  const personaInstruction = params.persona ? dedent5`
+  const personaInstruction = params.persona ? dedent6`
         <persona role="${params.persona.role}">
           ${params.persona.perspective}
@@ -1174,7 +1316,7 @@ async function paraphraseQuestion(params) {
           Use their vocabulary, priorities, and framing style.
         </persona>
       ` : "";
-  const styleInstruction = params.persona?.styles && params.persona.styles.length > 0 ? dedent5`
+  const styleInstruction = params.persona?.styles && params.persona.styles.length > 0 ? dedent6`
         <communication_styles>
           Generate paraphrases using these communication styles: ${params.persona.styles.join(", ")}
@@ -1190,17 +1332,17 @@ async function paraphraseQuestion(params) {
       role: "You are a linguistic expert specializing in paraphrasing database questions. Your task is to generate alternative phrasings of questions that preserve the exact same semantic meaning - they must all produce the identical SQL query.",
       objective: "Generate paraphrased versions of questions that preserve exact semantic meaning and produce identical SQL"
     }),
-    fragment5("original_question", params.question),
-    fragment5(
+    fragment6("original_question", params.question),
+    fragment6(
       "reference_sql",
       params.sql,
       "This SQL shows what the question is really asking - all paraphrases must ask for exactly this"
     ),
-    ...personaInstruction ? [fragment5("persona", personaInstruction)] : [],
-    ...styleInstruction ? [fragment5("communication_styles", styleInstruction)] : [],
-    fragment5(
+    ...personaInstruction ? [fragment6("persona", personaInstruction)] : [],
+    ...styleInstruction ? [fragment6("communication_styles", styleInstruction)] : [],
+    fragment6(
       "task",
-      dedent5`
+      dedent6`
         Generate exactly ${params.count} paraphrased versions of the original question.
         Requirements:
@@ -1277,7 +1419,7 @@ var BreadthEvolver = class extends PairProducer {
 // packages/text2sql/src/lib/synthesis/synthesizers/depth-evolver.ts
 import { groq as groq7 } from "@ai-sdk/groq";
 import { NoObjectGeneratedError as NoObjectGeneratedError2, NoOutputGeneratedError as NoOutputGeneratedError2 } from "ai";
-import dedent6 from "dedent";
+import dedent7 from "dedent";
 import pLimit3 from "p-limit";
 import pRetry2 from "p-retry";
 import z7 from "zod";
@@ -1285,14 +1427,14 @@ import "@deepagents/agent";
 import {
   ContextEngine as ContextEngine7,
   InMemoryContextStore as InMemoryContextStore7,
-  fragment as fragment6,
+  fragment as fragment7,
   guardrail as guardrail3,
   persona as persona6,
   structuredOutput as structuredOutput7,
   user as user7
 } from "@deepagents/context";
 var techniqueInstructions = {
-  "add-aggregation": dedent6`
+  "add-aggregation": dedent7`
     Add aggregation requirements to the question.
     Transform it to require GROUP BY, COUNT, SUM, AVG, MIN, MAX, or similar operations.
     Examples:
@@ -1300,7 +1442,7 @@ var techniqueInstructions = {
     - "List products" → "What is the average price per category?"
     - "Get employees" → "How many employees are in each department?"
   `,
-  "add-filter": dedent6`
+  "add-filter": dedent7`
     Add filtering conditions to the question.
     Transform it to require WHERE clauses with specific conditions.
     Examples:
@@ -1308,7 +1450,7 @@ var techniqueInstructions = {
     - "List customers" → "List customers who have made more than 5 purchases"
     - "Get products" → "Get products with price above $100"
   `,
-  "add-join": dedent6`
+  "add-join": dedent7`
     Add requirements that need data from related tables.
     Transform it to require JOIN operations between multiple tables.
     Examples:
@@ -1316,7 +1458,7 @@ var techniqueInstructions = {
     - "List products" → "List products with their supplier information"
     - "Get employees" → "Get employees with their department and manager names"
   `,
-  "add-reasoning": dedent6`
+  "add-reasoning": dedent7`
     Add multi-step reasoning requirements.
     Transform it to require logical deduction, comparisons, or derived calculations.
     Examples:
@@ -1324,7 +1466,7 @@ var techniqueInstructions = {
     - "List products" → "Which products are underperforming compared to their category average?"
     - "Get revenue" → "Which month had the highest growth compared to the previous month?"
   `,
-  hypothetical: dedent6`
+  hypothetical: dedent7`
     Add a hypothetical or speculative scenario.
     Transform it to require applying calculations or projections.
     Examples:
@@ -1348,21 +1490,21 @@ async function evolveQuestion(params) {
       role: "You are an expert at evolving simple database questions into more complex ones. Your task is to take a basic question and transform it into a more sophisticated version that requires advanced SQL techniques to answer.",
       objective: "Transform simple questions into complex versions requiring advanced SQL techniques"
     }),
-    fragment6("original_question", params.question),
-    fragment6(
+    fragment7("original_question", params.question),
+    fragment7(
       "original_sql",
       params.sql,
       "(This shows what the original question required)"
     ),
-    fragment6("database_schema", params.schema),
-    fragment6(
+    fragment7("database_schema", params.schema),
+    fragment7(
       "technique",
       { name: params.technique },
       params.techniqueInstruction
     ),
-    fragment6(
+    fragment7(
       "task",
-      dedent6`
+      dedent7`
         Evolve the original question using the "${params.technique}" technique.
         Requirements:
@@ -1455,9 +1597,7 @@ var DepthEvolver = class extends PairProducer {
       const sqlResult = await toSql({
         input: evolvedQuestion,
         adapter: this.adapter,
-        schemaFragments: [],
-        // Placeholder - needs to pass actual fragments
-        instructions: [],
+        fragments: [],
         model: this.options?.model
       });
       return {
@@ -1507,14 +1647,14 @@ async function withRetry2(computation) {
 // packages/text2sql/src/lib/synthesis/synthesizers/persona-generator.ts
 import { groq as groq8 } from "@ai-sdk/groq";
-import dedent7 from "dedent";
+import dedent8 from "dedent";
 import z8 from "zod";
 import "@deepagents/agent";
 import {
   ContextEngine as ContextEngine8,
   InMemoryContextStore as InMemoryContextStore8,
   XmlRenderer,
-  fragment as fragment7,
+  fragment as fragment8,
   guardrail as guardrail4,
   persona as personaFragment2,
   structuredOutput as structuredOutput8,
@@ -1547,10 +1687,10 @@ async function generatePersonas(schemaFragments, options) {
       role: "You are an expert at understanding database schemas and inferring who would use them.",
       objective: "Generate realistic personas representing users who would query this database"
     }),
-    fragment7("database_schema", schema),
-    fragment7(
+    fragment8("database_schema", schema),
+    fragment8(
       "task",
-      dedent7`
+      dedent8`
         Analyze the database schema and generate realistic personas representing
         the different types of users who would query this database.
@@ -1581,9 +1721,9 @@ async function generatePersonas(schemaFragments, options) {
         - Styles should match how this persona would naturally communicate
       `
     ),
-    fragment7(
+    fragment8(
       "example",
-      dedent7`
+      dedent8`
         For an e-commerce schema with orders, customers, products tables:
         {
@@ -1626,7 +1766,7 @@ import { XmlRenderer as XmlRenderer2 } from "@deepagents/context";
 // packages/text2sql/src/lib/agents/teachables.agent.ts
 import { groq as groq9 } from "@ai-sdk/groq";
-import dedent8 from "dedent";
+import dedent9 from "dedent";
 import z9 from "zod";
 import "@deepagents/agent";
 import {
@@ -1636,7 +1776,7 @@ import {
   clarification,
   example,
   explain,
-  fragment as fragment8,
+  fragment as fragment9,
   guardrail as guardrail5,
   hint,
   persona as persona7,
@@ -1710,11 +1850,11 @@ async function toTeachings(input, options) {
       role: 'You design "fragments" for a Text2SQL system. Fragments become structured XML instructions.',
       objective: "Choose only high-impact items that improve accuracy, safety, or clarity for this database"
     }),
-    fragment8("database_schema", input.schema),
-    ...input.context ? [fragment8("additional_context", input.context)] : [],
-    fragment8(
+    fragment9("database_schema", input.schema),
+    ...input.context ? [fragment9("additional_context", input.context)] : [],
+    fragment9(
       "output_structure",
-      dedent8`
+      dedent9`
         Output a JSON object with these optional arrays (include only relevant ones):
         - terms: [{ name: string, definition: string }] - Domain terminology
         - hints: [{ text: string }] - Helpful SQL generation hints
@@ -1728,9 +1868,9 @@ async function toTeachings(input, options) {
         - analogies: [{ concepts: string[], relationship: string, insight?: string, therefore?: string, pitfall?: string }]
       `
     ),
-    fragment8(
+    fragment9(
       "task",
-      dedent8`
+      dedent9`
         1. Analyze the schema to infer domain, relationships, and sensitive columns.
         2. Generate 3-10 fragments total across all categories, prioritizing:
            - guardrails for PII columns (email, ssn, phone, etc)