npm - @deepagents/text2sql - Versions diffs - 0.6.0 → 0.8.0 - Mend

@deepagents/text2sql 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/dist/lib/synthesis/index.js CHANGED Viewed

@@ -706,7 +706,7 @@ import dedent4 from "dedent";
 import z4 from "zod";
 import { agent as agent4, generate as generate5, user as user5 } from "@deepagents/agent";
 var complexityInstructions = {
-  low: dedent4`
+  simple: dedent4`
     Generate simple questions that require:
     - Basic SELECT with single table
     - Simple WHERE clauses with one condition
@@ -714,7 +714,7 @@ var complexityInstructions = {
     - No joins required
     Examples: "How many customers do we have?", "List all products", "What is the total revenue?"
   `,
-  medium: dedent4`
+  moderate: dedent4`
     Generate moderate questions that require:
     - JOINs between 2-3 tables
     - Multiple WHERE conditions (AND/OR)
@@ -723,7 +723,7 @@ var complexityInstructions = {
     - Basic subqueries
     Examples: "What are the top 5 customers by total orders?", "Which products have never been ordered?"
   `,
-  hard: dedent4`
+  complex: dedent4`
     Generate complex questions that require:
     - Multiple JOINs (3+ tables)
     - Nested subqueries or CTEs
@@ -732,13 +732,14 @@ var complexityInstructions = {
     - Date/time calculations
     Examples: "What is the month-over-month growth rate?", "Which customers have increased spending compared to last year?"
   `,
-  window: dedent4`
-    Generate advanced questions that require window functions:
-    - ROW_NUMBER, RANK, DENSE_RANK
+  "high complex": dedent4`
+    Generate highly complex questions that require advanced SQL features:
+    - Window functions (ROW_NUMBER, RANK, DENSE_RANK)
     - LAG, LEAD for comparisons
     - Running totals (SUM OVER)
     - Moving averages
     - PARTITION BY clauses
+    - Complex CTEs with multiple levels
     Examples: "What is the running total of sales per month?", "Rank customers by their purchase frequency within each region"
   `
 };
@@ -756,7 +757,7 @@ var questionGeneratorAgent = agent4({
   }),
   prompt: (state) => {
     const count = state?.count;
-    const complexity = state?.complexity ?? "medium";
+    const complexity = state?.complexity ?? "moderate";
     return dedent4`
       <identity>
         You are a synthetic data generator specializing in creating realistic natural language questions
@@ -793,19 +794,40 @@ async function generateQuestions(params) {
   const { introspection, complexity, count, prompt, model } = params;
   const agentInstance = model ? questionGeneratorAgent.clone({ model }) : questionGeneratorAgent;
   const userPrompt = prompt ?? `Generate ${count} questions at ${complexity} complexity given db schema.`;
-  const { experimental_output } = await generate5(agentInstance, [user5(userPrompt)], {
-    introspection,
-    complexity,
-    count
-  });
+  const { experimental_output } = await generate5(
+    agentInstance,
+    [user5(userPrompt)],
+    {
+      introspection,
+      complexity,
+      count
+    }
+  );
   return { questions: experimental_output.questions };
 }
 // packages/text2sql/src/lib/agents/sql.agent.ts
 import { groq as groq5 } from "@ai-sdk/groq";
-import { defaultSettingsMiddleware as defaultSettingsMiddleware2, wrapLanguageModel as wrapLanguageModel2 } from "ai";
+import {
+  APICallError,
+  JSONParseError,
+  NoContentGeneratedError,
+  NoObjectGeneratedError,
+  NoOutputGeneratedError,
+  TypeValidationError,
+  defaultSettingsMiddleware as defaultSettingsMiddleware2,
+  wrapLanguageModel as wrapLanguageModel2
+} from "ai";
+import { Console } from "node:console";
+import { createWriteStream } from "node:fs";
+import pRetry from "p-retry";
 import z5 from "zod";
-import { agent as agent5, generate as generate6, user as user6 } from "@deepagents/agent";
+import {
+  agent as agent5,
+  generate as generate6,
+  toOutput,
+  user as user6
+} from "@deepagents/agent";
 // packages/text2sql/src/lib/teach/xml.ts
 function wrapBlock(tag, children) {
@@ -1174,6 +1196,11 @@ function toTeachables(generated) {
 }
 // packages/text2sql/src/lib/agents/sql.agent.ts
+var logger = new Console({
+  stdout: createWriteStream("./sql-agent.log", { flags: "a" }),
+  stderr: createWriteStream("./sql-agent-error.log", { flags: "a" }),
+  inspectOptions: { depth: null }
+});
 var RETRY_TEMPERATURES = [0, 0.2, 0.3];
 var sqlQueryAgent = agent5({
   name: "text2sql",
@@ -1201,107 +1228,131 @@ function extractSql(output) {
   const match = output.match(/```sql\n?([\s\S]*?)```/);
   return match ? match[1].trim() : output.trim();
 }
-async function generateSql(params) {
-  const {
-    input,
-    model,
-    temperature,
-    introspection,
-    instructions,
-    previousError
-  } = params;
-  const agentInstance = sqlQueryAgent.clone({
-    model: wrapLanguageModel2({
-      model,
-      middleware: defaultSettingsMiddleware2({
-        settings: { temperature, topP: 1 }
-      })
-    })
-  });
-  const messages = previousError ? [
-    user6(input),
-    user6(
-      `<validation_error>Your previous SQL query had the following error: ${previousError}. Please fix the query.</validation_error>`
-    )
-  ] : [user6(input)];
-  try {
-    const { experimental_output: output } = await generate6(
-      agentInstance,
-      messages,
-      {
-        teachings: toInstructions(
-          "instructions",
-          persona({
-            name: "Freya",
-            role: "You are an expert SQL query generator. You translate natural language questions into precise, efficient SQL queries based on the provided database schema."
-          }),
-          ...instructions
-        ),
-        introspection
-      }
-    );
-    if ("error" in output) {
-      return { success: false, error: output.error, isUnanswerable: true };
-    }
-    return { success: true, sql: extractSql(output.sql) };
-  } catch (error) {
-    if (error instanceof Error && (error.message.includes("Failed to validate JSON") || error.message.includes("response did not match schema"))) {
-      return {
-        success: false,
-        error: `Schema validation failed: ${error.message}`
-      };
-    }
-    throw error;
+var marker = Symbol("SQLValidationError");
+var SQLValidationError = class _SQLValidationError extends Error {
+  [marker];
+  constructor(message) {
+    super(message);
+    this.name = "SQLValidationError";
+    this[marker] = true;
+  }
+  static isInstance(error) {
+    return error instanceof _SQLValidationError && error[marker] === true;
+  }
+};
+var UnanswerableSQLError = class _UnanswerableSQLError extends Error {
+  constructor(message) {
+    super(message);
+    this.name = "UnanswerableSQLError";
+  }
+  static isInstance(error) {
+    return error instanceof _UnanswerableSQLError;
   }
-}
-var sqlGenerators = {
-  generateSql
 };
-async function generateAndValidate(options, temperature, previousError) {
-  const result = await sqlGenerators.generateSql({
-    input: options.input,
-    model: options.model ?? sqlQueryAgent.model,
-    temperature,
-    introspection: options.introspection,
-    instructions: options.instructions,
-    previousError
-  });
-  if (!result.success) {
-    return {
-      ok: false,
-      error: result.error,
-      isUnanswerable: result.isUnanswerable
-    };
-  }
-  const validationError = await options.adapter.validate(result.sql);
-  if (validationError) {
-    return { ok: false, error: validationError };
-  }
-  return { ok: true, sql: result.sql };
-}
 async function toSql(options) {
   const { maxRetries = 3 } = options;
-  const errors = [];
-  for (let attempt = 1; attempt <= maxRetries; attempt++) {
-    const temperature = RETRY_TEMPERATURES[attempt - 1] ?? 0.3;
-    const result = await generateAndValidate(
-      options,
-      temperature,
-      errors.at(-1)
-    );
-    if (result.ok) {
+  return withRetry(
+    async (attemptNumber, errors, attempts) => {
+      const agentInstance = sqlQueryAgent.clone({
+        model: wrapLanguageModel2({
+          model: options.model ?? sqlQueryAgent.model,
+          middleware: defaultSettingsMiddleware2({
+            settings: {
+              temperature: RETRY_TEMPERATURES[attemptNumber - 1] ?? 0.3,
+              topP: 1
+            }
+          })
+        })
+      });
+      const messages = errors.length ? [
+        user6(options.input),
+        user6(
+          `<validation_error>Your previous SQL query had the following error: ${errors.at(-1)?.message}. Please fix the query.</validation_error>`
+        )
+      ] : [user6(options.input)];
+      const output = await toOutput(
+        generate6(agentInstance, messages, {
+          introspection: options.introspection,
+          teachings: toInstructions(
+            "instructions",
+            persona({
+              name: "Freya",
+              role: "You are an expert SQL query generator. You translate natural language questions into precise, efficient SQL queries based on the provided database schema."
+            }),
+            ...options.instructions
+          )
+        })
+      );
+      if ("error" in output) {
+        throw new UnanswerableSQLError(output.error);
+      }
+      const sql = extractSql(output.sql);
+      const validationError = await options.adapter.validate(sql);
+      if (validationError) {
+        throw new SQLValidationError(validationError);
+      }
       return {
-        sql: result.sql,
-        attempts: attempt,
-        errors: errors.length ? errors : void 0
+        attempts,
+        sql,
+        errors: errors.length ? errors.map(formatErrorMessage) : void 0
       };
+    },
+    { retries: maxRetries - 1 }
+  );
+}
+function formatErrorMessage(error) {
+  if (APICallError.isInstance(error)) {
+    if (error.message.startsWith("Failed to validate JSON")) {
+      return `Schema validation failed: ${error.message}`;
     }
-    if (result.isUnanswerable) {
-      return { sql: "", attempts: attempt, errors: [result.error] };
-    }
-    errors.push(result.error);
+    return error.message;
+  }
+  if (SQLValidationError.isInstance(error)) {
+    return `SQL Validation Error: ${error.message}`;
   }
-  return { sql: "", attempts: maxRetries, errors };
+  return error.message;
+}
+async function withRetry(computation, options = { retries: 3 }) {
+  const errors = [];
+  let attempts = 0;
+  return pRetry(
+    (attemptNumber) => {
+      return computation(attemptNumber, errors, ++attempts);
+    },
+    {
+      retries: options.retries,
+      shouldRetry: (context2) => {
+        if (UnanswerableSQLError.isInstance(context2.error)) {
+          return false;
+        }
+        if (SQLValidationError.isInstance(context2.error)) {
+          return true;
+        }
+        console.log({
+          NoObjectGeneratedError: NoObjectGeneratedError.isInstance(
+            context2.error
+          ),
+          NoOutputGeneratedError: NoOutputGeneratedError.isInstance(
+            context2.error
+          ),
+          APICallError: APICallError.isInstance(context2.error),
+          JSONParseError: JSONParseError.isInstance(context2.error),
+          TypeValidationError: TypeValidationError.isInstance(context2.error),
+          NoContentGeneratedError: NoContentGeneratedError.isInstance(
+            context2.error
+          )
+        });
+        return APICallError.isInstance(context2.error) || JSONParseError.isInstance(context2.error) || TypeValidationError.isInstance(context2.error) || NoObjectGeneratedError.isInstance(context2.error) || NoOutputGeneratedError.isInstance(context2.error) || NoContentGeneratedError.isInstance(context2.error);
+      },
+      onFailedAttempt(context2) {
+        logger.error(`toSQL`, context2.error);
+        console.log(
+          `Attempt ${context2.attemptNumber} failed. There are ${context2.retriesLeft} retries left.`
+        );
+        errors.push(context2.error);
+      }
+    }
+  );
 }
 // packages/text2sql/src/lib/synthesis/synthesizers/schema-synthesizer.ts
@@ -1314,7 +1365,7 @@ var SchemaSynthesizer = class extends PairProducer {
     super();
     this.adapter = adapter;
     this.options = options;
-    this.#complexities = Array.isArray(this.options.complexity) ? this.options.complexity : [this.options.complexity ?? "medium"];
+    this.#complexities = Array.isArray(this.options.complexity) ? this.options.complexity : [this.options.complexity ?? "moderate"];
     this.#personas = this.options.personas ?? [void 0];
     this.#limit = pLimit(this.options.concurrency ?? 5);
   }
@@ -1365,15 +1416,28 @@ Generate ${this.options.count} questions at ${complexity} complexity.` : void 0;
     );
     const pairs = await Promise.all(
       questions.map(async (question) => {
-        const result = await this.#limit(
-          () => toSql({
-            input: question,
-            adapter: this.adapter,
-            introspection,
-            instructions: this.options.teachings ?? [],
-            model: this.options.model
-          })
-        );
+        const result = await this.#limit(async () => {
+          try {
+            return await toSql({
+              input: question,
+              adapter: this.adapter,
+              introspection,
+              instructions: this.options.teachings ?? [],
+              model: this.options.model
+            });
+          } catch (error) {
+            if (UnanswerableSQLError.isInstance(error)) {
+              return {
+                attempts: 0,
+                sql: "",
+                errors: [
+                  `Cannot answer the question ${question} because ${error.message}`
+                ]
+              };
+            }
+            throw error;
+          }
+        });
         return {
           question,
           sql: result.sql,
@@ -1391,31 +1455,45 @@ import { defaultSettingsMiddleware as defaultSettingsMiddleware3, wrapLanguageMo
 import dedent5 from "dedent";
 import pLimit2 from "p-limit";
 import z6 from "zod";
-import { agent as agent6, generate as generate7, user as user7 } from "@deepagents/agent";
+import {
+  agent as agent6,
+  generate as generate7,
+  toOutput as toOutput2,
+  user as user7
+} from "@deepagents/agent";
 // packages/text2sql/src/lib/synthesis/synthesizers/styles.ts
-var styleInstructions = {
-  formal: "Use professional business language, complete sentences, no slang",
-  colloquial: "Use casual everyday speech, contractions, informal tone",
-  imperative: 'Phrase as commands: "Show me...", "Get...", "List..."',
-  interrogative: 'Phrase as questions: "What is...", "How many...", "Which..."',
-  descriptive: "Use detailed, verbose phrasing with extra context",
-  concise: "Use minimal words, telegram-style brevity",
-  vague: "Be intentionally ambiguous, use hedging language",
-  metaphorical: "Use figurative language, analogies, creative phrasing",
-  conversational: "Chat-like tone, as if talking to a colleague"
-};
 var ALL_STYLES = [
   "formal",
+  // Professional business language
   "colloquial",
+  // Casual everyday speech
   "imperative",
+  // Commands: "Show me...", "Get..."
   "interrogative",
+  // Questions: "What is...", "How many..."
   "descriptive",
+  // Verbose, detailed
   "concise",
+  // Brief, minimal
   "vague",
+  // Ambiguous, hedging
   "metaphorical",
+  // Figurative language
   "conversational"
+  // Chat-like
 ];
+var styleInstructions = {
+  formal: "Use professional business language, complete sentences, no slang",
+  colloquial: "Use casual everyday speech, contractions, informal tone",
+  imperative: 'Phrase as commands: "Show me...", "Get...", "List..."',
+  interrogative: 'Phrase as questions: "What is...", "How many...", "Which..."',
+  descriptive: "Use detailed, verbose phrasing with extra context",
+  concise: "Use minimal words, telegram-style brevity",
+  vague: "Be intentionally ambiguous, use hedging language",
+  metaphorical: "Use figurative language, analogies, creative phrasing",
+  conversational: "Chat-like tone, as if talking to a colleague"
+};
 // packages/text2sql/src/lib/synthesis/synthesizers/breadth-evolver.ts
 var paraphraserAgent = agent6({
@@ -1514,21 +1592,23 @@ var BreadthEvolver = class extends PairProducer {
     for await (const chunk of this.from(this.source)) {
       const tasks = chunk.map(
         (pair) => this.#limit(async () => {
-          const { experimental_output } = await generate7(
-            paraphraserAgent.clone({ model: this.options.model }),
-            [
-              user7(
-                `Paraphrase this question ${this.options.count} times: "${pair.question}"`
-              )
-            ],
-            {
-              question: pair.question,
-              sql: pair.sql,
-              count: this.options.count,
-              persona: this.options.persona
-            }
+          const { paraphrases } = await toOutput2(
+            generate7(
+              paraphraserAgent.clone({ model: this.options.model }),
+              [
+                user7(
+                  `Paraphrase this question ${this.options.count} times: "${pair.question}"`
+                )
+              ],
+              {
+                question: pair.question,
+                sql: pair.sql,
+                count: this.options.count,
+                persona: this.options.persona
+              }
+            )
           );
-          return experimental_output.paraphrases.map((paraphrase) => ({
+          return paraphrases.map((paraphrase) => ({
             question: paraphrase,
             sql: pair.sql,
             context: pair.context,
@@ -1545,14 +1625,14 @@ var BreadthEvolver = class extends PairProducer {
 // packages/text2sql/src/lib/synthesis/synthesizers/depth-evolver.ts
 import { groq as groq7 } from "@ai-sdk/groq";
 import {
-  NoObjectGeneratedError,
-  NoOutputGeneratedError,
+  NoObjectGeneratedError as NoObjectGeneratedError2,
+  NoOutputGeneratedError as NoOutputGeneratedError2,
   defaultSettingsMiddleware as defaultSettingsMiddleware4,
   wrapLanguageModel as wrapLanguageModel4
 } from "ai";
 import dedent6 from "dedent";
 import pLimit3 from "p-limit";
-import pRetry from "p-retry";
+import pRetry2 from "p-retry";
 import z7 from "zod";
 import { agent as agent7, generate as generate8, user as user8 } from "@deepagents/agent";
 var techniqueInstructions = {
@@ -1699,7 +1779,7 @@ var DepthEvolver = class extends PairProducer {
     }
   }
   async #processTask(pair, technique, introspection) {
-    const { experimental_output } = await withRetry(
+    const { experimental_output } = await withRetry2(
       () => generate8(
         questionEvolverAgent.clone({
           model: this.options?.model
@@ -1715,32 +1795,55 @@ var DepthEvolver = class extends PairProducer {
       )
     );
     const evolvedQuestion = experimental_output.evolvedQuestion;
-    const sqlResult = await toSql({
-      input: evolvedQuestion,
-      adapter: this.adapter,
-      introspection,
-      instructions: [],
-      model: this.options?.model
-    });
-    return {
-      question: evolvedQuestion,
-      sql: sqlResult.sql,
-      context: pair.context,
-      success: !sqlResult.errors || sqlResult.errors.length === 0
-    };
+    try {
+      const sqlResult = await toSql({
+        input: evolvedQuestion,
+        adapter: this.adapter,
+        introspection,
+        instructions: [],
+        model: this.options?.model
+      });
+      return {
+        question: evolvedQuestion,
+        sql: sqlResult.sql,
+        context: pair.context,
+        success: !sqlResult.errors || sqlResult.errors.length === 0
+      };
+    } catch (error) {
+      if (UnanswerableSQLError.isInstance(error)) {
+        return {
+          question: evolvedQuestion,
+          sql: "",
+          context: pair.context,
+          success: false,
+          errors: [
+            `Cannot answer the question ${evolvedQuestion} because ${error.message}`
+          ]
+        };
+      }
+      throw error;
+    }
   }
 };
-async function withRetry(computation) {
-  return pRetry(computation, {
+async function withRetry2(computation) {
+  return pRetry2(computation, {
     retries: 3,
     shouldRetry: (context2) => {
-      return NoObjectGeneratedError.isInstance(context2.error) || NoOutputGeneratedError.isInstance(context2.error);
+      console.log({
+        NoObjectGeneratedError: NoObjectGeneratedError2.isInstance(
+          context2.error
+        ),
+        NoOutputGeneratedError: NoOutputGeneratedError2.isInstance(
+          context2.error
+        )
+      });
+      return NoObjectGeneratedError2.isInstance(context2.error) || NoOutputGeneratedError2.isInstance(context2.error);
     },
     onFailedAttempt(context2) {
       console.log(
         `Attempt ${context2.attemptNumber} failed. There are ${context2.retriesLeft} retries left.`
       );
-      console.error(context2.error);
+      console.dir(context2.error, { depth: null });
     }
   });
 }