npm - @deepagents/text2sql - Versions diffs - 0.25.0 → 0.27.0 - Mend

@deepagents/text2sql 0.25.0 → 0.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

package/README.md +1 -1
package/dist/index.d.ts +0 -1
package/dist/index.d.ts.map +1 -1
package/dist/index.js +212 -285
package/dist/index.js.map +4 -4
package/dist/lib/adapters/adapter.d.ts +6 -0
package/dist/lib/adapters/adapter.d.ts.map +1 -1
package/dist/lib/adapters/bigquery/index.js +18 -1
package/dist/lib/adapters/bigquery/index.js.map +2 -2
package/dist/lib/adapters/groundings/abstract.grounding.d.ts +2 -1
package/dist/lib/adapters/groundings/abstract.grounding.d.ts.map +1 -1
package/dist/lib/adapters/groundings/index.js.map +2 -2
package/dist/lib/adapters/mysql/index.js +18 -1
package/dist/lib/adapters/mysql/index.js.map +2 -2
package/dist/lib/adapters/postgres/index.js +18 -1
package/dist/lib/adapters/postgres/index.js.map +2 -2
package/dist/lib/adapters/runtime-scope.d.ts +14 -0
package/dist/lib/adapters/runtime-scope.d.ts.map +1 -0
package/dist/lib/adapters/spreadsheet/index.js +18 -1
package/dist/lib/adapters/spreadsheet/index.js.map +2 -2
package/dist/lib/adapters/sqlite/index.js +18 -1
package/dist/lib/adapters/sqlite/index.js.map +2 -2
package/dist/lib/adapters/sqlserver/index.js +18 -1
package/dist/lib/adapters/sqlserver/index.js.map +2 -2
package/dist/lib/agents/exceptions.d.ts +22 -0
package/dist/lib/agents/exceptions.d.ts.map +1 -1
package/dist/lib/agents/result-tools.d.ts.map +1 -1
package/dist/lib/fragments/schema.d.ts +2 -1
package/dist/lib/fragments/schema.d.ts.map +1 -1
package/dist/lib/instructions.d.ts +1 -9
package/dist/lib/instructions.d.ts.map +1 -1
package/dist/lib/sql.d.ts +0 -3
package/dist/lib/sql.d.ts.map +1 -1
package/dist/lib/synthesis/index.js +428 -621
package/dist/lib/synthesis/index.js.map +4 -4
package/dist/lib/synthesis/synthesizers/index.d.ts +1 -2
package/dist/lib/synthesis/synthesizers/index.d.ts.map +1 -1
package/package.json +7 -6
package/dist/lib/agents/developer.agent.d.ts +0 -41
package/dist/lib/agents/developer.agent.d.ts.map +0 -1
package/dist/lib/agents/teachables.agent.d.ts +0 -10
package/dist/lib/agents/teachables.agent.d.ts.map +0 -1
package/dist/lib/synthesis/synthesizers/teachings-generator.d.ts +0 -20
package/dist/lib/synthesis/synthesizers/teachings-generator.d.ts.map +0 -1

package/dist/lib/synthesis/index.js CHANGED Viewed

@@ -711,38 +711,10 @@ var LastQueryExtractor = class extends BaseContextualExtractor {
   }
 };
-// packages/text2sql/src/lib/synthesis/synthesizers/schema-synthesizer.ts
-import pLimit from "p-limit";
-// packages/text2sql/src/lib/agents/exceptions.ts
-var sqlValidationMarker = Symbol("SQLValidationError");
-var unanswerableSqlMarker = Symbol("UnanswerableSQLError");
-var SQLValidationError = class _SQLValidationError extends Error {
-  [sqlValidationMarker];
-  constructor(message) {
-    super(message);
-    this.name = "SQLValidationError";
-    this[sqlValidationMarker] = true;
-  }
-  static isInstance(error) {
-    return error instanceof _SQLValidationError && error[sqlValidationMarker] === true;
-  }
-};
-var UnanswerableSQLError = class _UnanswerableSQLError extends Error {
-  [unanswerableSqlMarker];
-  constructor(message) {
-    super(message);
-    this.name = "UnanswerableSQLError";
-    this[unanswerableSqlMarker] = true;
-  }
-  static isInstance(error) {
-    return error instanceof _UnanswerableSQLError && error[unanswerableSqlMarker] === true;
-  }
-};
-// packages/text2sql/src/lib/agents/question.agent.ts
+// packages/text2sql/src/lib/synthesis/synthesizers/breadth-evolver.ts
 import { groq as groq4 } from "@ai-sdk/groq";
 import dedent4 from "dedent";
+import pLimit from "p-limit";
 import z4 from "zod";
 import "@deepagents/agent";
 import {
@@ -750,107 +722,223 @@ import {
   InMemoryContextStore as InMemoryContextStore4,
   fragment as fragment4,
   guardrail,
-  persona as persona4,
+  persona as personaFragment,
   structuredOutput as structuredOutput4,
   user as user4
 } from "@deepagents/context";
-var complexityInstructions = {
-  simple: dedent4`
-    Generate simple questions that require:
-    - Basic SELECT with single table
-    - Simple WHERE clauses with one condition
-    - COUNT(*) or basic aggregations
-    - No joins required
-    Examples: "How many customers do we have?", "List all products", "What is the total revenue?"
-  `,
-  moderate: dedent4`
-    Generate moderate questions that require:
-    - JOINs between 2-3 tables
-    - Multiple WHERE conditions (AND/OR)
-    - GROUP BY with HAVING clauses
-    - ORDER BY with LIMIT
-    - Basic subqueries
-    Examples: "What are the top 5 customers by total orders?", "Which products have never been ordered?"
-  `,
-  complex: dedent4`
-    Generate complex questions that require:
-    - Multiple JOINs (3+ tables)
-    - Nested subqueries or CTEs
-    - Complex aggregations with multiple GROUP BY columns
-    - CASE expressions
-    - Date/time calculations
-    Examples: "What is the month-over-month growth rate?", "Which customers have increased spending compared to last year?"
-  `,
-  "high complex": dedent4`
-    Generate highly complex questions that require advanced SQL features:
-    - Window functions (ROW_NUMBER, RANK, DENSE_RANK)
-    - LAG, LEAD for comparisons
-    - Running totals (SUM OVER)
-    - Moving averages
-    - PARTITION BY clauses
-    - Complex CTEs with multiple levels
-    Examples: "What is the running total of sales per month?", "Rank customers by their purchase frequency within each region"
-  `
+// packages/text2sql/src/lib/synthesis/synthesizers/styles.ts
+var ALL_STYLES = [
+  "formal",
+  // Professional business language
+  "colloquial",
+  // Casual everyday speech
+  "imperative",
+  // Commands: "Show me...", "Get..."
+  "interrogative",
+  // Questions: "What is...", "How many..."
+  "descriptive",
+  // Verbose, detailed
+  "concise",
+  // Brief, minimal
+  "vague",
+  // Ambiguous, hedging
+  "metaphorical",
+  // Figurative language
+  "conversational"
+  // Chat-like
+];
+var styleInstructions = {
+  formal: "Use professional business language, complete sentences, no slang",
+  colloquial: "Use casual everyday speech, contractions, informal tone",
+  imperative: 'Phrase as commands: "Show me...", "Get...", "List..."',
+  interrogative: 'Phrase as questions: "What is...", "How many...", "Which..."',
+  descriptive: "Use detailed, verbose phrasing with extra context",
+  concise: "Use minimal words, telegram-style brevity",
+  vague: "Be intentionally ambiguous, use hedging language",
+  metaphorical: "Use figurative language, analogies, creative phrasing",
+  conversational: "Chat-like tone, as if talking to a colleague"
 };
-var outputSchema2 = z4.object({
-  questions: z4.array(z4.string().describe("A natural language question about the data")).min(1).describe("List of natural language questions a user might ask")
+// packages/text2sql/src/lib/synthesis/synthesizers/breadth-evolver.ts
+var paraphraserOutputSchema = z4.object({
+  paraphrases: z4.array(
+    z4.string().describe("A paraphrased version of the original question")
+  ).min(1).describe("List of paraphrased questions that would produce the same SQL")
 });
-async function generateQuestions(params) {
-  const { introspection, complexity, count, prompt, model } = params;
+async function paraphraseQuestion(params) {
   const context = new ContextEngine4({
     store: new InMemoryContextStore4(),
-    chatId: `question-gen-${crypto.randomUUID()}`,
+    chatId: `paraphraser-${crypto.randomUUID()}`,
     userId: "system"
   });
+  const personaInstruction = params.persona ? dedent4`
+        <persona role="${params.persona.role}">
+          ${params.persona.perspective}
+          Paraphrase the question as this persona would naturally ask it.
+          Use their vocabulary, priorities, and framing style.
+        </persona>
+      ` : "";
+  const styleInstruction = params.persona?.styles && params.persona.styles.length > 0 ? dedent4`
+        <communication_styles>
+          Generate paraphrases using these communication styles: ${params.persona.styles.join(", ")}
+          Style definitions:
+          ${params.persona.styles.map((s) => `- ${s}: ${styleInstructions[s]}`).join("\n")}
+          Distribute paraphrases across these styles for variety.
+        </communication_styles>
+      ` : "";
   context.set(
-    persona4({
-      name: "question_generator",
-      role: "You are a synthetic data generator specializing in creating realistic natural language questions that users might ask about a database.",
-      objective: "Generate diverse, realistic natural language questions that match the specified complexity level"
+    personaFragment({
+      name: "question_paraphraser",
+      role: "You are a linguistic expert specializing in paraphrasing database questions. Your task is to generate alternative phrasings of questions that preserve the exact same semantic meaning - they must all produce the identical SQL query.",
+      objective: "Generate paraphrased versions of questions that preserve exact semantic meaning and produce identical SQL"
     }),
-    fragment4("database_schema", introspection || ""),
+    fragment4("original_question", params.question),
     fragment4(
-      "complexity",
-      { level: complexity },
-      complexityInstructions[complexity]
+      "reference_sql",
+      params.sql,
+      "This SQL shows what the question is really asking - all paraphrases must ask for exactly this"
     ),
+    ...personaInstruction ? [fragment4("persona", personaInstruction)] : [],
+    ...styleInstruction ? [fragment4("communication_styles", styleInstruction)] : [],
     fragment4(
       "task",
       dedent4`
-        Generate exactly ${count} natural language questions at the "${complexity}" complexity level.
-        The questions should:
-        1. Match the complexity requirements above
-        2. Use natural business language, not technical SQL terms
-        3. Be realistic questions a non-technical user would actually ask
-        4. Cover different tables and relationships when possible
+        Generate exactly ${params.count} paraphrased versions of the original question.
+        Requirements:
+        1. Each paraphrase must be semantically equivalent - it should produce the EXACT same SQL
+        2. Vary the sentence structure, word choice, and phrasing style
+        3. Use natural language without SQL keywords (SELECT, WHERE, JOIN, etc.)
+        4. Keep paraphrases realistic - how actual users would ask
+        5. Do not add or remove any conditions, filters, or requirements from the original
+        ${params.persona?.styles?.length ? "6. Apply the specified communication styles to create diverse phrasings" : ""}
       `
     ),
+    guardrail({ rule: "NEVER change what data is being requested" }),
     guardrail({
-      rule: "Questions MUST ONLY reference tables and columns that exist in the schema above"
-    }),
-    guardrail({
-      rule: "Before generating each question, verify that ALL entities (tables, columns, relationships) you reference are explicitly listed in the schema"
-    }),
-    guardrail({
-      rule: "DO NOT invent or assume tables/columns that are not explicitly shown in the schema"
+      rule: "NEVER add filters, aggregations, or conditions not in the original"
     }),
     guardrail({
-      rule: "Use natural language without SQL keywords like SELECT, WHERE, etc."
+      rule: "NEVER remove any specificity from the original question"
     }),
     guardrail({
-      rule: "All questions must match the specified complexity level"
+      rule: "All paraphrases must be answerable by the exact same SQL query"
     }),
     user4(
-      prompt ?? `Generate ${count} questions at ${complexity} complexity given db schema.`
+      `Paraphrase this question ${params.count} times: "${params.question}"`
     )
   );
-  const questionOutput = structuredOutput4({
-    model: model ?? groq4("openai/gpt-oss-20b"),
+  const paraphraserOutput = structuredOutput4({
+    model: params.model ?? groq4("openai/gpt-oss-20b"),
     context,
-    schema: outputSchema2
+    schema: paraphraserOutputSchema
   });
-  return questionOutput.generate();
+  return paraphraserOutput.generate();
 }
+var BreadthEvolver = class extends PairProducer {
+  /**
+   * @param source - Source pairs or producer to evolve
+   * @param options - Evolution options including count, persona, and concurrency
+   */
+  constructor(source, options) {
+    super();
+    this.source = source;
+    this.options = options;
+    this.#limit = pLimit(this.options.concurrency ?? 4);
+  }
+  #limit;
+  /**
+   * Batch pairs within each chunk for concurrent processing.
+   * Uses pLimit for concurrency control, yields results per pair after chunk completes.
+   */
+  async *produce() {
+    for await (const chunk of this.from(this.source)) {
+      const tasks = chunk.map(
+        (pair) => this.#limit(async () => {
+          const result = await paraphraseQuestion({
+            question: pair.question,
+            sql: pair.sql,
+            count: this.options.count,
+            persona: this.options.persona,
+            model: this.options.model
+          });
+          return result.paraphrases.map((paraphrase) => ({
+            question: paraphrase,
+            sql: pair.sql,
+            context: pair.context,
+            success: pair.success
+          }));
+        })
+      );
+      const results = await Promise.all(tasks);
+      yield results.flat();
+    }
+  }
+};
+// packages/text2sql/src/lib/synthesis/synthesizers/depth-evolver.ts
+import { groq as groq6 } from "@ai-sdk/groq";
+import { NoObjectGeneratedError as NoObjectGeneratedError2, NoOutputGeneratedError as NoOutputGeneratedError2 } from "ai";
+import dedent6 from "dedent";
+import pLimit2 from "p-limit";
+import pRetry2 from "p-retry";
+import z6 from "zod";
+import "@deepagents/agent";
+import {
+  ContextEngine as ContextEngine6,
+  InMemoryContextStore as InMemoryContextStore6,
+  fragment as fragment6,
+  guardrail as guardrail3,
+  persona as persona5,
+  structuredOutput as structuredOutput6,
+  user as user6
+} from "@deepagents/context";
+// packages/text2sql/src/lib/agents/exceptions.ts
+var sqlValidationMarker = Symbol("SQLValidationError");
+var unanswerableSqlMarker = Symbol("UnanswerableSQLError");
+var sqlScopeMarker = Symbol("SQLScopeError");
+var SQLValidationError = class _SQLValidationError extends Error {
+  [sqlValidationMarker];
+  constructor(message) {
+    super(message);
+    this.name = "SQLValidationError";
+    this[sqlValidationMarker] = true;
+  }
+  static isInstance(error) {
+    return error instanceof _SQLValidationError && error[sqlValidationMarker] === true;
+  }
+};
+var UnanswerableSQLError = class _UnanswerableSQLError extends Error {
+  [unanswerableSqlMarker];
+  constructor(message) {
+    super(message);
+    this.name = "UnanswerableSQLError";
+    this[unanswerableSqlMarker] = true;
+  }
+  static isInstance(error) {
+    return error instanceof _UnanswerableSQLError && error[unanswerableSqlMarker] === true;
+  }
+};
+var SQLScopeError = class _SQLScopeError extends Error {
+  [sqlScopeMarker];
+  payload;
+  errorType;
+  constructor(payload) {
+    super(JSON.stringify(payload));
+    this.name = "SQLScopeError";
+    this.payload = payload;
+    this.errorType = payload.error_type;
+    this[sqlScopeMarker] = true;
+  }
+  static isInstance(error) {
+    return error instanceof _SQLScopeError && error[sqlScopeMarker] === true;
+  }
+};
 // packages/text2sql/src/lib/agents/sql.agent.ts
 import { groq as groq5 } from "@ai-sdk/groq";
@@ -875,7 +963,7 @@ import {
   fragment as fragment5,
   guardrail as guardrail2,
   hint,
-  persona as persona5,
+  persona as persona4,
   policy,
   structuredOutput as structuredOutput5,
   user as user5,
@@ -1099,7 +1187,7 @@ async function toSql(options) {
         userId: "system"
       });
       context.set(
-        persona5({
+        persona4({
           name: "Freya",
           role: SQL_AGENT_ROLE,
           objective: SQL_AGENT_OBJECTIVE
@@ -1265,287 +1353,9 @@ async function withRetry(computation, options = { retries: 3 }) {
   );
 }
-// packages/text2sql/src/lib/synthesis/synthesizers/schema-synthesizer.ts
-var SchemaSynthesizer = class extends PairProducer {
-  /**
-   * @param adapter - Database adapter for schema introspection and SQL validation
-   * @param options - Synthesis configuration including count, complexity, and concurrency
-   */
-  constructor(adapter, options) {
-    super();
-    this.adapter = adapter;
-    this.options = options;
-    this.#complexities = Array.isArray(this.options.complexity) ? this.options.complexity : [this.options.complexity ?? "moderate"];
-    this.#personas = this.options.personas ?? [void 0];
-    this.#limit = pLimit(this.options.concurrency ?? 5);
-  }
-  #complexities = [];
-  #personas = [];
-  #limit;
-  /**
-   * Generates question-SQL pairs by iterating through all persona × complexity combinations.
-   * Uses parallel processing bounded by the configured concurrency limit.
-   * Yields results as each combination completes (streaming pattern).
-   * @returns Generated pairs from all combinations
-   */
-  async *produce() {
-    const introspection = "";
-    const combinations = this.#personas.flatMap(
-      (persona8) => this.#complexities.map((complexity) => ({ persona: persona8, complexity }))
-    );
-    for (const { persona: persona8, complexity } of combinations) {
-      const pairs = await this.#processCombination(
-        introspection,
-        persona8,
-        complexity
-      );
-      if (pairs.length) {
-        yield pairs;
-      }
-    }
-  }
-  /**
-   * Processes a single persona × complexity combination by generating questions
-   * and converting each to SQL in parallel.
-   */
-  async #processCombination(introspection, persona8, complexity) {
-    const personaContext = persona8 ? `As ${persona8.role}, ${persona8.perspective}
-Generate questions this persona would ask.` : void 0;
-    const prompt = personaContext ? `${personaContext}
-Generate ${this.options.count} questions at ${complexity} complexity.` : void 0;
-    const { questions } = await this.#limit(
-      () => generateQuestions({
-        introspection,
-        complexity,
-        count: this.options.count,
-        prompt,
-        model: this.options.model
-      })
-    );
-    const pairs = await Promise.all(
-      questions.map(async (question) => {
-        const result = await this.#limit(async () => {
-          try {
-            return await toSql({
-              input: question,
-              adapter: this.adapter,
-              fragments: this.options.teachings ?? [],
-              model: this.options.model
-            });
-          } catch (error) {
-            if (UnanswerableSQLError.isInstance(error)) {
-              return {
-                attempts: 0,
-                sql: "",
-                errors: [
-                  `Cannot answer the question ${question} because ${error.message}`
-                ]
-              };
-            }
-            throw error;
-          }
-        });
-        return {
-          question,
-          sql: result.sql,
-          success: !result.errors || result.errors.length === 0
-        };
-      })
-    );
-    return pairs;
-  }
-};
-// packages/text2sql/src/lib/synthesis/synthesizers/breadth-evolver.ts
-import { groq as groq6 } from "@ai-sdk/groq";
-import dedent6 from "dedent";
-import pLimit2 from "p-limit";
-import z6 from "zod";
-import "@deepagents/agent";
-import {
-  ContextEngine as ContextEngine6,
-  InMemoryContextStore as InMemoryContextStore6,
-  fragment as fragment6,
-  guardrail as guardrail3,
-  persona as personaFragment,
-  structuredOutput as structuredOutput6,
-  user as user6
-} from "@deepagents/context";
-// packages/text2sql/src/lib/synthesis/synthesizers/styles.ts
-var ALL_STYLES = [
-  "formal",
-  // Professional business language
-  "colloquial",
-  // Casual everyday speech
-  "imperative",
-  // Commands: "Show me...", "Get..."
-  "interrogative",
-  // Questions: "What is...", "How many..."
-  "descriptive",
-  // Verbose, detailed
-  "concise",
-  // Brief, minimal
-  "vague",
-  // Ambiguous, hedging
-  "metaphorical",
-  // Figurative language
-  "conversational"
-  // Chat-like
-];
-var styleInstructions = {
-  formal: "Use professional business language, complete sentences, no slang",
-  colloquial: "Use casual everyday speech, contractions, informal tone",
-  imperative: 'Phrase as commands: "Show me...", "Get...", "List..."',
-  interrogative: 'Phrase as questions: "What is...", "How many...", "Which..."',
-  descriptive: "Use detailed, verbose phrasing with extra context",
-  concise: "Use minimal words, telegram-style brevity",
-  vague: "Be intentionally ambiguous, use hedging language",
-  metaphorical: "Use figurative language, analogies, creative phrasing",
-  conversational: "Chat-like tone, as if talking to a colleague"
-};
-// packages/text2sql/src/lib/synthesis/synthesizers/breadth-evolver.ts
-var paraphraserOutputSchema = z6.object({
-  paraphrases: z6.array(
-    z6.string().describe("A paraphrased version of the original question")
-  ).min(1).describe("List of paraphrased questions that would produce the same SQL")
-});
-async function paraphraseQuestion(params) {
-  const context = new ContextEngine6({
-    store: new InMemoryContextStore6(),
-    chatId: `paraphraser-${crypto.randomUUID()}`,
-    userId: "system"
-  });
-  const personaInstruction = params.persona ? dedent6`
-        <persona role="${params.persona.role}">
-          ${params.persona.perspective}
-          Paraphrase the question as this persona would naturally ask it.
-          Use their vocabulary, priorities, and framing style.
-        </persona>
-      ` : "";
-  const styleInstruction = params.persona?.styles && params.persona.styles.length > 0 ? dedent6`
-        <communication_styles>
-          Generate paraphrases using these communication styles: ${params.persona.styles.join(", ")}
-          Style definitions:
-          ${params.persona.styles.map((s) => `- ${s}: ${styleInstructions[s]}`).join("\n")}
-          Distribute paraphrases across these styles for variety.
-        </communication_styles>
-      ` : "";
-  context.set(
-    personaFragment({
-      name: "question_paraphraser",
-      role: "You are a linguistic expert specializing in paraphrasing database questions. Your task is to generate alternative phrasings of questions that preserve the exact same semantic meaning - they must all produce the identical SQL query.",
-      objective: "Generate paraphrased versions of questions that preserve exact semantic meaning and produce identical SQL"
-    }),
-    fragment6("original_question", params.question),
-    fragment6(
-      "reference_sql",
-      params.sql,
-      "This SQL shows what the question is really asking - all paraphrases must ask for exactly this"
-    ),
-    ...personaInstruction ? [fragment6("persona", personaInstruction)] : [],
-    ...styleInstruction ? [fragment6("communication_styles", styleInstruction)] : [],
-    fragment6(
-      "task",
-      dedent6`
-        Generate exactly ${params.count} paraphrased versions of the original question.
-        Requirements:
-        1. Each paraphrase must be semantically equivalent - it should produce the EXACT same SQL
-        2. Vary the sentence structure, word choice, and phrasing style
-        3. Use natural language without SQL keywords (SELECT, WHERE, JOIN, etc.)
-        4. Keep paraphrases realistic - how actual users would ask
-        5. Do not add or remove any conditions, filters, or requirements from the original
-        ${params.persona?.styles?.length ? "6. Apply the specified communication styles to create diverse phrasings" : ""}
-      `
-    ),
-    guardrail3({ rule: "NEVER change what data is being requested" }),
-    guardrail3({
-      rule: "NEVER add filters, aggregations, or conditions not in the original"
-    }),
-    guardrail3({
-      rule: "NEVER remove any specificity from the original question"
-    }),
-    guardrail3({
-      rule: "All paraphrases must be answerable by the exact same SQL query"
-    }),
-    user6(
-      `Paraphrase this question ${params.count} times: "${params.question}"`
-    )
-  );
-  const paraphraserOutput = structuredOutput6({
-    model: params.model ?? groq6("openai/gpt-oss-20b"),
-    context,
-    schema: paraphraserOutputSchema
-  });
-  return paraphraserOutput.generate();
-}
-var BreadthEvolver = class extends PairProducer {
-  /**
-   * @param source - Source pairs or producer to evolve
-   * @param options - Evolution options including count, persona, and concurrency
-   */
-  constructor(source, options) {
-    super();
-    this.source = source;
-    this.options = options;
-    this.#limit = pLimit2(this.options.concurrency ?? 4);
-  }
-  #limit;
-  /**
-   * Batch pairs within each chunk for concurrent processing.
-   * Uses pLimit for concurrency control, yields results per pair after chunk completes.
-   */
-  async *produce() {
-    for await (const chunk of this.from(this.source)) {
-      const tasks = chunk.map(
-        (pair) => this.#limit(async () => {
-          const result = await paraphraseQuestion({
-            question: pair.question,
-            sql: pair.sql,
-            count: this.options.count,
-            persona: this.options.persona,
-            model: this.options.model
-          });
-          return result.paraphrases.map((paraphrase) => ({
-            question: paraphrase,
-            sql: pair.sql,
-            context: pair.context,
-            success: pair.success
-          }));
-        })
-      );
-      const results = await Promise.all(tasks);
-      yield results.flat();
-    }
-  }
-};
 // packages/text2sql/src/lib/synthesis/synthesizers/depth-evolver.ts
-import { groq as groq7 } from "@ai-sdk/groq";
-import { NoObjectGeneratedError as NoObjectGeneratedError2, NoOutputGeneratedError as NoOutputGeneratedError2 } from "ai";
-import dedent7 from "dedent";
-import pLimit3 from "p-limit";
-import pRetry2 from "p-retry";
-import z7 from "zod";
-import "@deepagents/agent";
-import {
-  ContextEngine as ContextEngine7,
-  InMemoryContextStore as InMemoryContextStore7,
-  fragment as fragment7,
-  guardrail as guardrail4,
-  persona as persona6,
-  structuredOutput as structuredOutput7,
-  user as user7
-} from "@deepagents/context";
 var techniqueInstructions = {
-  "add-aggregation": dedent7`
+  "add-aggregation": dedent6`
     Add aggregation requirements to the question.
     Transform it to require GROUP BY, COUNT, SUM, AVG, MIN, MAX, or similar operations.
     Examples:
@@ -1553,7 +1363,7 @@ var techniqueInstructions = {
     - "List products" → "What is the average price per category?"
     - "Get employees" → "How many employees are in each department?"
   `,
-  "add-filter": dedent7`
+  "add-filter": dedent6`
     Add filtering conditions to the question.
     Transform it to require WHERE clauses with specific conditions.
     Examples:
@@ -1561,7 +1371,7 @@ var techniqueInstructions = {
     - "List customers" → "List customers who have made more than 5 purchases"
     - "Get products" → "Get products with price above $100"
   `,
-  "add-join": dedent7`
+  "add-join": dedent6`
     Add requirements that need data from related tables.
     Transform it to require JOIN operations between multiple tables.
     Examples:
@@ -1569,7 +1379,7 @@ var techniqueInstructions = {
     - "List products" → "List products with their supplier information"
     - "Get employees" → "Get employees with their department and manager names"
   `,
-  "add-reasoning": dedent7`
+  "add-reasoning": dedent6`
     Add multi-step reasoning requirements.
     Transform it to require logical deduction, comparisons, or derived calculations.
     Examples:
@@ -1577,7 +1387,7 @@ var techniqueInstructions = {
     - "List products" → "Which products are underperforming compared to their category average?"
     - "Get revenue" → "Which month had the highest growth compared to the previous month?"
   `,
-  hypothetical: dedent7`
+  hypothetical: dedent6`
     Add a hypothetical or speculative scenario.
     Transform it to require applying calculations or projections.
     Examples:
@@ -1586,36 +1396,36 @@ var techniqueInstructions = {
     - "Get costs" → "What would be the impact of a 10% discount on profit margins?"
   `
 };
-var evolverOutputSchema = z7.object({
-  evolvedQuestion: z7.string().describe("The evolved, more complex version of the original question")
+var evolverOutputSchema = z6.object({
+  evolvedQuestion: z6.string().describe("The evolved, more complex version of the original question")
 });
 async function evolveQuestion(params) {
-  const context = new ContextEngine7({
-    store: new InMemoryContextStore7(),
+  const context = new ContextEngine6({
+    store: new InMemoryContextStore6(),
     chatId: `evolver-${crypto.randomUUID()}`,
     userId: "system"
   });
   context.set(
-    persona6({
+    persona5({
       name: "question_evolver",
       role: "You are an expert at evolving simple database questions into more complex ones. Your task is to take a basic question and transform it into a more sophisticated version that requires advanced SQL techniques to answer.",
       objective: "Transform simple questions into complex versions requiring advanced SQL techniques"
     }),
-    fragment7("original_question", params.question),
-    fragment7(
+    fragment6("original_question", params.question),
+    fragment6(
       "original_sql",
       params.sql,
       "(This shows what the original question required)"
     ),
-    fragment7("database_schema", params.schema),
-    fragment7(
+    fragment6("database_schema", params.schema),
+    fragment6(
       "technique",
       { name: params.technique },
       params.techniqueInstruction
     ),
-    fragment7(
+    fragment6(
       "task",
-      dedent7`
+      dedent6`
         Evolve the original question using the "${params.technique}" technique.
         Requirements:
@@ -1627,22 +1437,22 @@ async function evolveQuestion(params) {
         6. The evolved question should build upon the original topic/domain
       `
     ),
-    guardrail4({
+    guardrail3({
       rule: "The evolved question MUST require more complex SQL than the original"
     }),
-    guardrail4({
+    guardrail3({
       rule: "Do not ask for data that does not exist in the schema"
     }),
-    guardrail4({
+    guardrail3({
       rule: "Keep the question grounded in the same domain as the original"
     }),
-    guardrail4({ rule: "Make sure the question is clear and unambiguous" }),
-    user7(
+    guardrail3({ rule: "Make sure the question is clear and unambiguous" }),
+    user6(
       `Evolve this question using "${params.technique}": "${params.question}"`
     )
   );
-  const evolverOutput = structuredOutput7({
-    model: params.model ?? groq7("openai/gpt-oss-20b"),
+  const evolverOutput = structuredOutput6({
+    model: params.model ?? groq6("openai/gpt-oss-20b"),
     context,
     schema: evolverOutputSchema
   });
@@ -1666,7 +1476,7 @@ var DepthEvolver = class extends PairProducer {
     this.source = source;
     this.adapter = adapter;
     this.options = options;
-    this.#limit = pLimit3(this.options?.concurrency ?? 4);
+    this.#limit = pLimit2(this.options?.concurrency ?? 4);
   }
   #limit;
   /**
@@ -1757,28 +1567,28 @@ async function withRetry2(computation) {
 }
 // packages/text2sql/src/lib/synthesis/synthesizers/persona-generator.ts
-import { groq as groq8 } from "@ai-sdk/groq";
-import dedent8 from "dedent";
-import z8 from "zod";
+import { groq as groq7 } from "@ai-sdk/groq";
+import dedent7 from "dedent";
+import z7 from "zod";
 import "@deepagents/agent";
 import {
-  ContextEngine as ContextEngine8,
-  InMemoryContextStore as InMemoryContextStore8,
+  ContextEngine as ContextEngine7,
+  InMemoryContextStore as InMemoryContextStore7,
   XmlRenderer,
-  fragment as fragment8,
-  guardrail as guardrail5,
+  fragment as fragment7,
+  guardrail as guardrail4,
   persona as personaFragment2,
-  structuredOutput as structuredOutput8,
-  user as user8
+  structuredOutput as structuredOutput7,
+  user as user7
 } from "@deepagents/context";
-var outputSchema3 = z8.object({
-  personas: z8.array(
-    z8.object({
-      role: z8.string().describe("The job title or role of this persona"),
-      perspective: z8.string().describe(
+var outputSchema2 = z7.object({
+  personas: z7.array(
+    z7.object({
+      role: z7.string().describe("The job title or role of this persona"),
+      perspective: z7.string().describe(
         "Rich description of what this persona cares about when querying the database"
       ),
-      styles: z8.array(z8.enum(ALL_STYLES)).min(1).max(3).describe(
+      styles: z7.array(z7.enum(ALL_STYLES)).min(1).max(3).describe(
         "Typical communication styles for this persona (1-3 styles)"
       )
     })
@@ -1787,8 +1597,8 @@ var outputSchema3 = z8.object({
 async function generatePersonas(schemaFragments, options) {
   const schema = new XmlRenderer().render(schemaFragments);
   const count = options?.count ?? 5;
-  const context = new ContextEngine8({
-    store: new InMemoryContextStore8(),
+  const context = new ContextEngine7({
+    store: new InMemoryContextStore7(),
     chatId: `persona-gen-${crypto.randomUUID()}`,
     userId: "system"
   });
@@ -1798,10 +1608,10 @@ async function generatePersonas(schemaFragments, options) {
       role: "You are an expert at understanding database schemas and inferring who would use them.",
       objective: "Generate realistic personas representing users who would query this database"
     }),
-    fragment8("database_schema", schema),
-    fragment8(
+    fragment7("database_schema", schema),
+    fragment7(
       "task",
-      dedent8`
+      dedent7`
         Analyze the database schema and generate realistic personas representing
         the different types of users who would query this database.
@@ -1832,9 +1642,9 @@ async function generatePersonas(schemaFragments, options) {
         - Styles should match how this persona would naturally communicate
       `
     ),
-    fragment8(
+    fragment7(
       "example",
-      dedent8`
+      dedent7`
         For an e-commerce schema with orders, customers, products tables:
         {
@@ -1850,237 +1660,235 @@ async function generatePersonas(schemaFragments, options) {
         }
       `
     ),
-    guardrail5({
+    guardrail4({
       rule: "Only generate personas relevant to the actual schema provided"
     }),
-    guardrail5({
+    guardrail4({
       rule: "Do not invent tables or data that do not exist in the schema"
     }),
-    guardrail5({
+    guardrail4({
       rule: "Ensure perspectives are specific to the domain, not generic"
     }),
-    user8(
+    user7(
       `Generate exactly ${count} distinct personas who would query this database.`
     )
   );
-  const personaOutput = structuredOutput8({
-    model: options?.model ?? groq8("openai/gpt-oss-20b"),
+  const personaOutput = structuredOutput7({
+    model: options?.model ?? groq7("openai/gpt-oss-20b"),
     context,
-    schema: outputSchema3
+    schema: outputSchema2
   });
   const output = await personaOutput.generate();
   return output.personas;
 }
-// packages/text2sql/src/lib/synthesis/synthesizers/teachings-generator.ts
-import { XmlRenderer as XmlRenderer2 } from "@deepagents/context";
+// packages/text2sql/src/lib/synthesis/synthesizers/schema-synthesizer.ts
+import pLimit3 from "p-limit";
-// packages/text2sql/src/lib/agents/teachables.agent.ts
-import { groq as groq9 } from "@ai-sdk/groq";
-import dedent9 from "dedent";
-import z9 from "zod";
+// packages/text2sql/src/lib/agents/question.agent.ts
+import { groq as groq8 } from "@ai-sdk/groq";
+import dedent8 from "dedent";
+import z8 from "zod";
 import "@deepagents/agent";
 import {
-  ContextEngine as ContextEngine9,
-  InMemoryContextStore as InMemoryContextStore9,
-  analogy,
-  clarification,
-  example as example2,
-  explain,
-  fragment as fragment9,
-  guardrail as guardrail6,
-  hint as hint2,
-  persona as persona7,
-  quirk,
-  structuredOutput as structuredOutput9,
-  styleGuide,
-  term,
-  user as user9,
-  workflow as workflow2
+  ContextEngine as ContextEngine8,
+  InMemoryContextStore as InMemoryContextStore8,
+  fragment as fragment8,
+  guardrail as guardrail5,
+  persona as persona6,
+  structuredOutput as structuredOutput8,
+  user as user8
 } from "@deepagents/context";
-var outputSchema4 = z9.object({
-  terms: z9.array(z9.object({ name: z9.string(), definition: z9.string() })).optional().describe("Domain terminology definitions"),
-  hints: z9.array(z9.object({ text: z9.string() })).optional().describe("Helpful hints for SQL generation"),
-  guardrails: z9.array(
-    z9.object({
-      rule: z9.string(),
-      reason: z9.string().optional(),
-      action: z9.string().optional()
-    })
-  ).optional().describe("Safety rules and constraints"),
-  explains: z9.array(
-    z9.object({
-      concept: z9.string(),
-      explanation: z9.string(),
-      therefore: z9.string().optional()
-    })
-  ).optional().describe("Concept explanations"),
-  examples: z9.array(
-    z9.object({
-      question: z9.string(),
-      answer: z9.string(),
-      note: z9.string().optional()
-    })
-  ).optional().describe("Example question-answer pairs"),
-  clarifications: z9.array(z9.object({ when: z9.string(), ask: z9.string(), reason: z9.string() })).optional().describe("When to ask for clarification"),
-  workflows: z9.array(
-    z9.object({
-      task: z9.string(),
-      steps: z9.array(z9.string()).min(1),
-      triggers: z9.array(z9.string()).optional(),
-      notes: z9.string().optional()
-    })
-  ).optional().describe("Multi-step workflows"),
-  quirks: z9.array(z9.object({ issue: z9.string(), workaround: z9.string() })).optional().describe("Known issues and workarounds"),
-  styleGuides: z9.array(
-    z9.object({
-      prefer: z9.string(),
-      never: z9.string().optional(),
-      always: z9.string().optional()
-    })
-  ).optional().describe("SQL style preferences"),
-  analogies: z9.array(
-    z9.object({
-      concepts: z9.array(z9.string()).min(2),
-      relationship: z9.string(),
-      insight: z9.string().optional(),
-      therefore: z9.string().optional(),
-      pitfall: z9.string().optional()
-    })
-  ).optional().describe("Concept analogies")
+var complexityInstructions = {
+  simple: dedent8`
+    Generate simple questions that require:
+    - Basic SELECT with single table
+    - Simple WHERE clauses with one condition
+    - COUNT(*) or basic aggregations
+    - No joins required
+    Examples: "How many customers do we have?", "List all products", "What is the total revenue?"
+  `,
+  moderate: dedent8`
+    Generate moderate questions that require:
+    - JOINs between 2-3 tables
+    - Multiple WHERE conditions (AND/OR)
+    - GROUP BY with HAVING clauses
+    - ORDER BY with LIMIT
+    - Basic subqueries
+    Examples: "What are the top 5 customers by total orders?", "Which products have never been ordered?"
+  `,
+  complex: dedent8`
+    Generate complex questions that require:
+    - Multiple JOINs (3+ tables)
+    - Nested subqueries or CTEs
+    - Complex aggregations with multiple GROUP BY columns
+    - CASE expressions
+    - Date/time calculations
+    Examples: "What is the month-over-month growth rate?", "Which customers have increased spending compared to last year?"
+  `,
+  "high complex": dedent8`
+    Generate highly complex questions that require advanced SQL features:
+    - Window functions (ROW_NUMBER, RANK, DENSE_RANK)
+    - LAG, LEAD for comparisons
+    - Running totals (SUM OVER)
+    - Moving averages
+    - PARTITION BY clauses
+    - Complex CTEs with multiple levels
+    Examples: "What is the running total of sales per month?", "Rank customers by their purchase frequency within each region"
+  `
+};
+var outputSchema3 = z8.object({
+  questions: z8.array(z8.string().describe("A natural language question about the data")).min(1).describe("List of natural language questions a user might ask")
 });
-async function toTeachings(input, options) {
-  const context = new ContextEngine9({
-    store: new InMemoryContextStore9(),
-    chatId: `teachables-gen-${crypto.randomUUID()}`,
+async function generateQuestions(params) {
+  const { introspection, complexity, count, prompt, model } = params;
+  const context = new ContextEngine8({
+    store: new InMemoryContextStore8(),
+    chatId: `question-gen-${crypto.randomUUID()}`,
     userId: "system"
   });
   context.set(
-    persona7({
-      name: "teachables-author",
-      role: 'You design "fragments" for a Text2SQL system. Fragments become structured XML instructions.',
-      objective: "Choose only high-impact items that improve accuracy, safety, or clarity for this database"
+    persona6({
+      name: "question_generator",
+      role: "You are a synthetic data generator specializing in creating realistic natural language questions that users might ask about a database.",
+      objective: "Generate diverse, realistic natural language questions that match the specified complexity level"
     }),
-    fragment9("database_schema", input.schema),
-    ...input.context ? [fragment9("additional_context", input.context)] : [],
-    fragment9(
-      "output_structure",
-      dedent9`
-        Output a JSON object with these optional arrays (include only relevant ones):
-        - terms: [{ name: string, definition: string }] - Domain terminology
-        - hints: [{ text: string }] - Helpful SQL generation hints
-        - guardrails: [{ rule: string, reason?: string, action?: string }] - Safety constraints
-        - explains: [{ concept: string, explanation: string, therefore?: string }] - Concept explanations
-        - examples: [{ question: string, answer: string, note?: string }] - Q&A examples
-        - clarifications: [{ when: string, ask: string, reason: string }] - Clarification triggers
-        - workflows: [{ task: string, steps: string[], triggers?: string[], notes?: string }] - Multi-step tasks
-        - quirks: [{ issue: string, workaround: string }] - Known issues
-        - styleGuides: [{ prefer: string, never?: string, always?: string }] - SQL style rules
-        - analogies: [{ concepts: string[], relationship: string, insight?: string, therefore?: string, pitfall?: string }]
-      `
+    fragment8("database_schema", introspection || ""),
+    fragment8(
+      "complexity",
+      { level: complexity },
+      complexityInstructions[complexity]
     ),
-    fragment9(
+    fragment8(
       "task",
-      dedent9`
-        1. Analyze the schema to infer domain, relationships, and sensitive columns.
-        2. Generate 3-10 fragments total across all categories, prioritizing:
-           - guardrails for PII columns (email, ssn, phone, etc)
-           - hints for status/enum columns
-           - clarifications for ambiguous terms
-        3. Ground everything in the schema - do not invent tables/columns.
-        4. Only include categories that are relevant to this schema.
+      dedent8`
+        Generate exactly ${count} natural language questions at the "${complexity}" complexity level.
+        The questions should:
+        1. Match the complexity requirements above
+        2. Use natural business language, not technical SQL terms
+        3. Be realistic questions a non-technical user would actually ask
+        4. Cover different tables and relationships when possible
       `
     ),
-    user9(
-      `Analyze this database schema and generate fragments that will help an AI generate accurate SQL queries.`
+    guardrail5({
+      rule: "Questions MUST ONLY reference tables and columns that exist in the schema above"
+    }),
+    guardrail5({
+      rule: "Before generating each question, verify that ALL entities (tables, columns, relationships) you reference are explicitly listed in the schema"
+    }),
+    guardrail5({
+      rule: "DO NOT invent or assume tables/columns that are not explicitly shown in the schema"
+    }),
+    guardrail5({
+      rule: "Use natural language without SQL keywords like SELECT, WHERE, etc."
+    }),
+    guardrail5({
+      rule: "All questions must match the specified complexity level"
+    }),
+    user8(
+      prompt ?? `Generate ${count} questions at ${complexity} complexity given db schema.`
     )
   );
-  const teachablesOutput = structuredOutput9({
-    model: options?.model ?? groq9("openai/gpt-oss-20b"),
+  const questionOutput = structuredOutput8({
+    model: model ?? groq8("openai/gpt-oss-20b"),
     context,
-    schema: outputSchema4
+    schema: outputSchema3
   });
-  const result = await teachablesOutput.generate();
-  const fragments = [];
-  result.terms?.forEach((t) => fragments.push(term(t.name, t.definition)));
-  result.hints?.forEach((h) => fragments.push(hint2(h.text)));
-  result.guardrails?.forEach(
-    (g) => fragments.push(
-      guardrail6({ rule: g.rule, reason: g.reason, action: g.action })
-    )
-  );
-  result.explains?.forEach(
-    (e) => fragments.push(
-      explain({
-        concept: e.concept,
-        explanation: e.explanation,
-        therefore: e.therefore
-      })
-    )
-  );
-  result.examples?.forEach(
-    (e) => fragments.push(
-      example2({ question: e.question, answer: e.answer, note: e.note })
-    )
-  );
-  result.clarifications?.forEach(
-    (c) => fragments.push(
-      clarification({ when: c.when, ask: c.ask, reason: c.reason })
-    )
-  );
-  result.workflows?.forEach(
-    (w) => fragments.push(
-      workflow2({
-        task: w.task,
-        steps: w.steps,
-        triggers: w.triggers,
-        notes: w.notes
-      })
-    )
-  );
-  result.quirks?.forEach(
-    (q) => fragments.push(quirk({ issue: q.issue, workaround: q.workaround }))
-  );
-  result.styleGuides?.forEach(
-    (s) => fragments.push(
-      styleGuide({ prefer: s.prefer, never: s.never, always: s.always })
-    )
-  );
-  result.analogies?.forEach(
-    (a) => fragments.push(
-      analogy({
-        concepts: a.concepts,
-        relationship: a.relationship,
-        insight: a.insight,
-        therefore: a.therefore,
-        pitfall: a.pitfall
-      })
-    )
-  );
-  return fragments;
+  return questionOutput.generate();
 }
-// packages/text2sql/src/lib/synthesis/synthesizers/teachings-generator.ts
-async function generateTeachings(schemaFragments, options) {
-  const schema = new XmlRenderer2().render(schemaFragments);
-  const maxRetries = options?.maxRetries ?? 3;
-  let lastError;
-  for (let attempt = 0; attempt < maxRetries; attempt++) {
-    try {
-      return await toTeachings(
-        { schema, context: options?.context },
-        { model: options?.model }
+// packages/text2sql/src/lib/synthesis/synthesizers/schema-synthesizer.ts
+var SchemaSynthesizer = class extends PairProducer {
+  /**
+   * @param adapter - Database adapter for schema introspection and SQL validation
+   * @param options - Synthesis configuration including count, complexity, and concurrency
+   */
+  constructor(adapter, options) {
+    super();
+    this.adapter = adapter;
+    this.options = options;
+    this.#complexities = Array.isArray(this.options.complexity) ? this.options.complexity : [this.options.complexity ?? "moderate"];
+    this.#personas = this.options.personas ?? [void 0];
+    this.#limit = pLimit3(this.options.concurrency ?? 5);
+  }
+  #complexities = [];
+  #personas = [];
+  #limit;
+  /**
+   * Generates question-SQL pairs by iterating through all persona × complexity combinations.
+   * Uses parallel processing bounded by the configured concurrency limit.
+   * Yields results as each combination completes (streaming pattern).
+   * @returns Generated pairs from all combinations
+   */
+  async *produce() {
+    const introspection = "";
+    const combinations = this.#personas.flatMap(
+      (persona7) => this.#complexities.map((complexity) => ({ persona: persona7, complexity }))
+    );
+    for (const { persona: persona7, complexity } of combinations) {
+      const pairs = await this.#processCombination(
+        introspection,
+        persona7,
+        complexity
       );
-    } catch (error) {
-      lastError = error;
-      const isRetryable = lastError.message.includes("parse") || lastError.message.includes("schema") || lastError.message.includes("No object generated") || lastError.name.includes("AI_");
-      if (!isRetryable) {
-        throw lastError;
+      if (pairs.length) {
+        yield pairs;
       }
     }
   }
-  throw lastError;
-}
+  /**
+   * Processes a single persona × complexity combination by generating questions
+   * and converting each to SQL in parallel.
+   */
+  async #processCombination(introspection, persona7, complexity) {
+    const personaContext = persona7 ? `As ${persona7.role}, ${persona7.perspective}
+Generate questions this persona would ask.` : void 0;
+    const prompt = personaContext ? `${personaContext}
+Generate ${this.options.count} questions at ${complexity} complexity.` : void 0;
+    const { questions } = await this.#limit(
+      () => generateQuestions({
+        introspection,
+        complexity,
+        count: this.options.count,
+        prompt,
+        model: this.options.model
+      })
+    );
+    const pairs = await Promise.all(
+      questions.map(async (question) => {
+        const result = await this.#limit(async () => {
+          try {
+            return await toSql({
+              input: question,
+              adapter: this.adapter,
+              fragments: this.options.teachings ?? [],
+              model: this.options.model
+            });
+          } catch (error) {
+            if (UnanswerableSQLError.isInstance(error)) {
+              return {
+                attempts: 0,
+                sql: "",
+                errors: [
+                  `Cannot answer the question ${question} because ${error.message}`
+                ]
+              };
+            }
+            throw error;
+          }
+        });
+        return {
+          question,
+          sql: result.sql,
+          success: !result.errors || result.errors.length === 0
+        };
+      })
+    );
+    return pairs;
+  }
+};
 export {
   ALL_STYLES,
   BaseContextualExtractor,
@@ -2099,7 +1907,6 @@ export {
   WindowedContextExtractor,
   formatConversation,
   generatePersonas,
-  generateTeachings,
   getMessageText,
   resolveContext,
   styleInstructions,