@deepagents/text2sql 0.25.0 → 0.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/index.d.ts +0 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +212 -285
- package/dist/index.js.map +4 -4
- package/dist/lib/adapters/adapter.d.ts +6 -0
- package/dist/lib/adapters/adapter.d.ts.map +1 -1
- package/dist/lib/adapters/bigquery/index.js +18 -1
- package/dist/lib/adapters/bigquery/index.js.map +2 -2
- package/dist/lib/adapters/groundings/abstract.grounding.d.ts +2 -1
- package/dist/lib/adapters/groundings/abstract.grounding.d.ts.map +1 -1
- package/dist/lib/adapters/groundings/index.js.map +2 -2
- package/dist/lib/adapters/mysql/index.js +18 -1
- package/dist/lib/adapters/mysql/index.js.map +2 -2
- package/dist/lib/adapters/postgres/index.js +18 -1
- package/dist/lib/adapters/postgres/index.js.map +2 -2
- package/dist/lib/adapters/runtime-scope.d.ts +14 -0
- package/dist/lib/adapters/runtime-scope.d.ts.map +1 -0
- package/dist/lib/adapters/spreadsheet/index.js +18 -1
- package/dist/lib/adapters/spreadsheet/index.js.map +2 -2
- package/dist/lib/adapters/sqlite/index.js +18 -1
- package/dist/lib/adapters/sqlite/index.js.map +2 -2
- package/dist/lib/adapters/sqlserver/index.js +18 -1
- package/dist/lib/adapters/sqlserver/index.js.map +2 -2
- package/dist/lib/agents/exceptions.d.ts +22 -0
- package/dist/lib/agents/exceptions.d.ts.map +1 -1
- package/dist/lib/agents/result-tools.d.ts.map +1 -1
- package/dist/lib/fragments/schema.d.ts +2 -1
- package/dist/lib/fragments/schema.d.ts.map +1 -1
- package/dist/lib/instructions.d.ts +1 -9
- package/dist/lib/instructions.d.ts.map +1 -1
- package/dist/lib/sql.d.ts +0 -3
- package/dist/lib/sql.d.ts.map +1 -1
- package/dist/lib/synthesis/index.js +428 -621
- package/dist/lib/synthesis/index.js.map +4 -4
- package/dist/lib/synthesis/synthesizers/index.d.ts +1 -2
- package/dist/lib/synthesis/synthesizers/index.d.ts.map +1 -1
- package/package.json +7 -6
- package/dist/lib/agents/developer.agent.d.ts +0 -41
- package/dist/lib/agents/developer.agent.d.ts.map +0 -1
- package/dist/lib/agents/teachables.agent.d.ts +0 -10
- package/dist/lib/agents/teachables.agent.d.ts.map +0 -1
- package/dist/lib/synthesis/synthesizers/teachings-generator.d.ts +0 -20
- package/dist/lib/synthesis/synthesizers/teachings-generator.d.ts.map +0 -1
|
@@ -711,38 +711,10 @@ var LastQueryExtractor = class extends BaseContextualExtractor {
|
|
|
711
711
|
}
|
|
712
712
|
};
|
|
713
713
|
|
|
714
|
-
// packages/text2sql/src/lib/synthesis/synthesizers/
|
|
715
|
-
import pLimit from "p-limit";
|
|
716
|
-
|
|
717
|
-
// packages/text2sql/src/lib/agents/exceptions.ts
|
|
718
|
-
var sqlValidationMarker = Symbol("SQLValidationError");
|
|
719
|
-
var unanswerableSqlMarker = Symbol("UnanswerableSQLError");
|
|
720
|
-
var SQLValidationError = class _SQLValidationError extends Error {
|
|
721
|
-
[sqlValidationMarker];
|
|
722
|
-
constructor(message) {
|
|
723
|
-
super(message);
|
|
724
|
-
this.name = "SQLValidationError";
|
|
725
|
-
this[sqlValidationMarker] = true;
|
|
726
|
-
}
|
|
727
|
-
static isInstance(error) {
|
|
728
|
-
return error instanceof _SQLValidationError && error[sqlValidationMarker] === true;
|
|
729
|
-
}
|
|
730
|
-
};
|
|
731
|
-
var UnanswerableSQLError = class _UnanswerableSQLError extends Error {
|
|
732
|
-
[unanswerableSqlMarker];
|
|
733
|
-
constructor(message) {
|
|
734
|
-
super(message);
|
|
735
|
-
this.name = "UnanswerableSQLError";
|
|
736
|
-
this[unanswerableSqlMarker] = true;
|
|
737
|
-
}
|
|
738
|
-
static isInstance(error) {
|
|
739
|
-
return error instanceof _UnanswerableSQLError && error[unanswerableSqlMarker] === true;
|
|
740
|
-
}
|
|
741
|
-
};
|
|
742
|
-
|
|
743
|
-
// packages/text2sql/src/lib/agents/question.agent.ts
|
|
714
|
+
// packages/text2sql/src/lib/synthesis/synthesizers/breadth-evolver.ts
|
|
744
715
|
import { groq as groq4 } from "@ai-sdk/groq";
|
|
745
716
|
import dedent4 from "dedent";
|
|
717
|
+
import pLimit from "p-limit";
|
|
746
718
|
import z4 from "zod";
|
|
747
719
|
import "@deepagents/agent";
|
|
748
720
|
import {
|
|
@@ -750,107 +722,223 @@ import {
|
|
|
750
722
|
InMemoryContextStore as InMemoryContextStore4,
|
|
751
723
|
fragment as fragment4,
|
|
752
724
|
guardrail,
|
|
753
|
-
persona as
|
|
725
|
+
persona as personaFragment,
|
|
754
726
|
structuredOutput as structuredOutput4,
|
|
755
727
|
user as user4
|
|
756
728
|
} from "@deepagents/context";
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
"
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
- Moving averages
|
|
790
|
-
- PARTITION BY clauses
|
|
791
|
-
- Complex CTEs with multiple levels
|
|
792
|
-
Examples: "What is the running total of sales per month?", "Rank customers by their purchase frequency within each region"
|
|
793
|
-
`
|
|
729
|
+
|
|
730
|
+
// packages/text2sql/src/lib/synthesis/synthesizers/styles.ts
|
|
731
|
+
var ALL_STYLES = [
|
|
732
|
+
"formal",
|
|
733
|
+
// Professional business language
|
|
734
|
+
"colloquial",
|
|
735
|
+
// Casual everyday speech
|
|
736
|
+
"imperative",
|
|
737
|
+
// Commands: "Show me...", "Get..."
|
|
738
|
+
"interrogative",
|
|
739
|
+
// Questions: "What is...", "How many..."
|
|
740
|
+
"descriptive",
|
|
741
|
+
// Verbose, detailed
|
|
742
|
+
"concise",
|
|
743
|
+
// Brief, minimal
|
|
744
|
+
"vague",
|
|
745
|
+
// Ambiguous, hedging
|
|
746
|
+
"metaphorical",
|
|
747
|
+
// Figurative language
|
|
748
|
+
"conversational"
|
|
749
|
+
// Chat-like
|
|
750
|
+
];
|
|
751
|
+
var styleInstructions = {
|
|
752
|
+
formal: "Use professional business language, complete sentences, no slang",
|
|
753
|
+
colloquial: "Use casual everyday speech, contractions, informal tone",
|
|
754
|
+
imperative: 'Phrase as commands: "Show me...", "Get...", "List..."',
|
|
755
|
+
interrogative: 'Phrase as questions: "What is...", "How many...", "Which..."',
|
|
756
|
+
descriptive: "Use detailed, verbose phrasing with extra context",
|
|
757
|
+
concise: "Use minimal words, telegram-style brevity",
|
|
758
|
+
vague: "Be intentionally ambiguous, use hedging language",
|
|
759
|
+
metaphorical: "Use figurative language, analogies, creative phrasing",
|
|
760
|
+
conversational: "Chat-like tone, as if talking to a colleague"
|
|
794
761
|
};
|
|
795
|
-
|
|
796
|
-
|
|
762
|
+
|
|
763
|
+
// packages/text2sql/src/lib/synthesis/synthesizers/breadth-evolver.ts
|
|
764
|
+
var paraphraserOutputSchema = z4.object({
|
|
765
|
+
paraphrases: z4.array(
|
|
766
|
+
z4.string().describe("A paraphrased version of the original question")
|
|
767
|
+
).min(1).describe("List of paraphrased questions that would produce the same SQL")
|
|
797
768
|
});
|
|
798
|
-
async function
|
|
799
|
-
const { introspection, complexity, count, prompt, model } = params;
|
|
769
|
+
async function paraphraseQuestion(params) {
|
|
800
770
|
const context = new ContextEngine4({
|
|
801
771
|
store: new InMemoryContextStore4(),
|
|
802
|
-
chatId: `
|
|
772
|
+
chatId: `paraphraser-${crypto.randomUUID()}`,
|
|
803
773
|
userId: "system"
|
|
804
774
|
});
|
|
775
|
+
const personaInstruction = params.persona ? dedent4`
|
|
776
|
+
<persona role="${params.persona.role}">
|
|
777
|
+
${params.persona.perspective}
|
|
778
|
+
|
|
779
|
+
Paraphrase the question as this persona would naturally ask it.
|
|
780
|
+
Use their vocabulary, priorities, and framing style.
|
|
781
|
+
</persona>
|
|
782
|
+
` : "";
|
|
783
|
+
const styleInstruction = params.persona?.styles && params.persona.styles.length > 0 ? dedent4`
|
|
784
|
+
<communication_styles>
|
|
785
|
+
Generate paraphrases using these communication styles: ${params.persona.styles.join(", ")}
|
|
786
|
+
|
|
787
|
+
Style definitions:
|
|
788
|
+
${params.persona.styles.map((s) => `- ${s}: ${styleInstructions[s]}`).join("\n")}
|
|
789
|
+
|
|
790
|
+
Distribute paraphrases across these styles for variety.
|
|
791
|
+
</communication_styles>
|
|
792
|
+
` : "";
|
|
805
793
|
context.set(
|
|
806
|
-
|
|
807
|
-
name: "
|
|
808
|
-
role: "You are a
|
|
809
|
-
objective: "Generate
|
|
794
|
+
personaFragment({
|
|
795
|
+
name: "question_paraphraser",
|
|
796
|
+
role: "You are a linguistic expert specializing in paraphrasing database questions. Your task is to generate alternative phrasings of questions that preserve the exact same semantic meaning - they must all produce the identical SQL query.",
|
|
797
|
+
objective: "Generate paraphrased versions of questions that preserve exact semantic meaning and produce identical SQL"
|
|
810
798
|
}),
|
|
811
|
-
fragment4("
|
|
799
|
+
fragment4("original_question", params.question),
|
|
812
800
|
fragment4(
|
|
813
|
-
"
|
|
814
|
-
|
|
815
|
-
|
|
801
|
+
"reference_sql",
|
|
802
|
+
params.sql,
|
|
803
|
+
"This SQL shows what the question is really asking - all paraphrases must ask for exactly this"
|
|
816
804
|
),
|
|
805
|
+
...personaInstruction ? [fragment4("persona", personaInstruction)] : [],
|
|
806
|
+
...styleInstruction ? [fragment4("communication_styles", styleInstruction)] : [],
|
|
817
807
|
fragment4(
|
|
818
808
|
"task",
|
|
819
809
|
dedent4`
|
|
820
|
-
Generate exactly ${count}
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
810
|
+
Generate exactly ${params.count} paraphrased versions of the original question.
|
|
811
|
+
|
|
812
|
+
Requirements:
|
|
813
|
+
1. Each paraphrase must be semantically equivalent - it should produce the EXACT same SQL
|
|
814
|
+
2. Vary the sentence structure, word choice, and phrasing style
|
|
815
|
+
3. Use natural language without SQL keywords (SELECT, WHERE, JOIN, etc.)
|
|
816
|
+
4. Keep paraphrases realistic - how actual users would ask
|
|
817
|
+
5. Do not add or remove any conditions, filters, or requirements from the original
|
|
818
|
+
${params.persona?.styles?.length ? "6. Apply the specified communication styles to create diverse phrasings" : ""}
|
|
826
819
|
`
|
|
827
820
|
),
|
|
821
|
+
guardrail({ rule: "NEVER change what data is being requested" }),
|
|
828
822
|
guardrail({
|
|
829
|
-
rule: "
|
|
830
|
-
}),
|
|
831
|
-
guardrail({
|
|
832
|
-
rule: "Before generating each question, verify that ALL entities (tables, columns, relationships) you reference are explicitly listed in the schema"
|
|
833
|
-
}),
|
|
834
|
-
guardrail({
|
|
835
|
-
rule: "DO NOT invent or assume tables/columns that are not explicitly shown in the schema"
|
|
823
|
+
rule: "NEVER add filters, aggregations, or conditions not in the original"
|
|
836
824
|
}),
|
|
837
825
|
guardrail({
|
|
838
|
-
rule: "
|
|
826
|
+
rule: "NEVER remove any specificity from the original question"
|
|
839
827
|
}),
|
|
840
828
|
guardrail({
|
|
841
|
-
rule: "All
|
|
829
|
+
rule: "All paraphrases must be answerable by the exact same SQL query"
|
|
842
830
|
}),
|
|
843
831
|
user4(
|
|
844
|
-
|
|
832
|
+
`Paraphrase this question ${params.count} times: "${params.question}"`
|
|
845
833
|
)
|
|
846
834
|
);
|
|
847
|
-
const
|
|
848
|
-
model: model ?? groq4("openai/gpt-oss-20b"),
|
|
835
|
+
const paraphraserOutput = structuredOutput4({
|
|
836
|
+
model: params.model ?? groq4("openai/gpt-oss-20b"),
|
|
849
837
|
context,
|
|
850
|
-
schema:
|
|
838
|
+
schema: paraphraserOutputSchema
|
|
851
839
|
});
|
|
852
|
-
return
|
|
840
|
+
return paraphraserOutput.generate();
|
|
853
841
|
}
|
|
842
|
+
var BreadthEvolver = class extends PairProducer {
|
|
843
|
+
/**
|
|
844
|
+
* @param source - Source pairs or producer to evolve
|
|
845
|
+
* @param options - Evolution options including count, persona, and concurrency
|
|
846
|
+
*/
|
|
847
|
+
constructor(source, options) {
|
|
848
|
+
super();
|
|
849
|
+
this.source = source;
|
|
850
|
+
this.options = options;
|
|
851
|
+
this.#limit = pLimit(this.options.concurrency ?? 4);
|
|
852
|
+
}
|
|
853
|
+
#limit;
|
|
854
|
+
/**
|
|
855
|
+
* Batch pairs within each chunk for concurrent processing.
|
|
856
|
+
* Uses pLimit for concurrency control, yields results per pair after chunk completes.
|
|
857
|
+
*/
|
|
858
|
+
async *produce() {
|
|
859
|
+
for await (const chunk of this.from(this.source)) {
|
|
860
|
+
const tasks = chunk.map(
|
|
861
|
+
(pair) => this.#limit(async () => {
|
|
862
|
+
const result = await paraphraseQuestion({
|
|
863
|
+
question: pair.question,
|
|
864
|
+
sql: pair.sql,
|
|
865
|
+
count: this.options.count,
|
|
866
|
+
persona: this.options.persona,
|
|
867
|
+
model: this.options.model
|
|
868
|
+
});
|
|
869
|
+
return result.paraphrases.map((paraphrase) => ({
|
|
870
|
+
question: paraphrase,
|
|
871
|
+
sql: pair.sql,
|
|
872
|
+
context: pair.context,
|
|
873
|
+
success: pair.success
|
|
874
|
+
}));
|
|
875
|
+
})
|
|
876
|
+
);
|
|
877
|
+
const results = await Promise.all(tasks);
|
|
878
|
+
yield results.flat();
|
|
879
|
+
}
|
|
880
|
+
}
|
|
881
|
+
};
|
|
882
|
+
|
|
883
|
+
// packages/text2sql/src/lib/synthesis/synthesizers/depth-evolver.ts
|
|
884
|
+
import { groq as groq6 } from "@ai-sdk/groq";
|
|
885
|
+
import { NoObjectGeneratedError as NoObjectGeneratedError2, NoOutputGeneratedError as NoOutputGeneratedError2 } from "ai";
|
|
886
|
+
import dedent6 from "dedent";
|
|
887
|
+
import pLimit2 from "p-limit";
|
|
888
|
+
import pRetry2 from "p-retry";
|
|
889
|
+
import z6 from "zod";
|
|
890
|
+
import "@deepagents/agent";
|
|
891
|
+
import {
|
|
892
|
+
ContextEngine as ContextEngine6,
|
|
893
|
+
InMemoryContextStore as InMemoryContextStore6,
|
|
894
|
+
fragment as fragment6,
|
|
895
|
+
guardrail as guardrail3,
|
|
896
|
+
persona as persona5,
|
|
897
|
+
structuredOutput as structuredOutput6,
|
|
898
|
+
user as user6
|
|
899
|
+
} from "@deepagents/context";
|
|
900
|
+
|
|
901
|
+
// packages/text2sql/src/lib/agents/exceptions.ts
|
|
902
|
+
var sqlValidationMarker = Symbol("SQLValidationError");
|
|
903
|
+
var unanswerableSqlMarker = Symbol("UnanswerableSQLError");
|
|
904
|
+
var sqlScopeMarker = Symbol("SQLScopeError");
|
|
905
|
+
var SQLValidationError = class _SQLValidationError extends Error {
|
|
906
|
+
[sqlValidationMarker];
|
|
907
|
+
constructor(message) {
|
|
908
|
+
super(message);
|
|
909
|
+
this.name = "SQLValidationError";
|
|
910
|
+
this[sqlValidationMarker] = true;
|
|
911
|
+
}
|
|
912
|
+
static isInstance(error) {
|
|
913
|
+
return error instanceof _SQLValidationError && error[sqlValidationMarker] === true;
|
|
914
|
+
}
|
|
915
|
+
};
|
|
916
|
+
var UnanswerableSQLError = class _UnanswerableSQLError extends Error {
|
|
917
|
+
[unanswerableSqlMarker];
|
|
918
|
+
constructor(message) {
|
|
919
|
+
super(message);
|
|
920
|
+
this.name = "UnanswerableSQLError";
|
|
921
|
+
this[unanswerableSqlMarker] = true;
|
|
922
|
+
}
|
|
923
|
+
static isInstance(error) {
|
|
924
|
+
return error instanceof _UnanswerableSQLError && error[unanswerableSqlMarker] === true;
|
|
925
|
+
}
|
|
926
|
+
};
|
|
927
|
+
var SQLScopeError = class _SQLScopeError extends Error {
|
|
928
|
+
[sqlScopeMarker];
|
|
929
|
+
payload;
|
|
930
|
+
errorType;
|
|
931
|
+
constructor(payload) {
|
|
932
|
+
super(JSON.stringify(payload));
|
|
933
|
+
this.name = "SQLScopeError";
|
|
934
|
+
this.payload = payload;
|
|
935
|
+
this.errorType = payload.error_type;
|
|
936
|
+
this[sqlScopeMarker] = true;
|
|
937
|
+
}
|
|
938
|
+
static isInstance(error) {
|
|
939
|
+
return error instanceof _SQLScopeError && error[sqlScopeMarker] === true;
|
|
940
|
+
}
|
|
941
|
+
};
|
|
854
942
|
|
|
855
943
|
// packages/text2sql/src/lib/agents/sql.agent.ts
|
|
856
944
|
import { groq as groq5 } from "@ai-sdk/groq";
|
|
@@ -875,7 +963,7 @@ import {
|
|
|
875
963
|
fragment as fragment5,
|
|
876
964
|
guardrail as guardrail2,
|
|
877
965
|
hint,
|
|
878
|
-
persona as
|
|
966
|
+
persona as persona4,
|
|
879
967
|
policy,
|
|
880
968
|
structuredOutput as structuredOutput5,
|
|
881
969
|
user as user5,
|
|
@@ -1099,7 +1187,7 @@ async function toSql(options) {
|
|
|
1099
1187
|
userId: "system"
|
|
1100
1188
|
});
|
|
1101
1189
|
context.set(
|
|
1102
|
-
|
|
1190
|
+
persona4({
|
|
1103
1191
|
name: "Freya",
|
|
1104
1192
|
role: SQL_AGENT_ROLE,
|
|
1105
1193
|
objective: SQL_AGENT_OBJECTIVE
|
|
@@ -1265,287 +1353,9 @@ async function withRetry(computation, options = { retries: 3 }) {
|
|
|
1265
1353
|
);
|
|
1266
1354
|
}
|
|
1267
1355
|
|
|
1268
|
-
// packages/text2sql/src/lib/synthesis/synthesizers/schema-synthesizer.ts
|
|
1269
|
-
var SchemaSynthesizer = class extends PairProducer {
|
|
1270
|
-
/**
|
|
1271
|
-
* @param adapter - Database adapter for schema introspection and SQL validation
|
|
1272
|
-
* @param options - Synthesis configuration including count, complexity, and concurrency
|
|
1273
|
-
*/
|
|
1274
|
-
constructor(adapter, options) {
|
|
1275
|
-
super();
|
|
1276
|
-
this.adapter = adapter;
|
|
1277
|
-
this.options = options;
|
|
1278
|
-
this.#complexities = Array.isArray(this.options.complexity) ? this.options.complexity : [this.options.complexity ?? "moderate"];
|
|
1279
|
-
this.#personas = this.options.personas ?? [void 0];
|
|
1280
|
-
this.#limit = pLimit(this.options.concurrency ?? 5);
|
|
1281
|
-
}
|
|
1282
|
-
#complexities = [];
|
|
1283
|
-
#personas = [];
|
|
1284
|
-
#limit;
|
|
1285
|
-
/**
|
|
1286
|
-
* Generates question-SQL pairs by iterating through all persona × complexity combinations.
|
|
1287
|
-
* Uses parallel processing bounded by the configured concurrency limit.
|
|
1288
|
-
* Yields results as each combination completes (streaming pattern).
|
|
1289
|
-
* @returns Generated pairs from all combinations
|
|
1290
|
-
*/
|
|
1291
|
-
async *produce() {
|
|
1292
|
-
const introspection = "";
|
|
1293
|
-
const combinations = this.#personas.flatMap(
|
|
1294
|
-
(persona8) => this.#complexities.map((complexity) => ({ persona: persona8, complexity }))
|
|
1295
|
-
);
|
|
1296
|
-
for (const { persona: persona8, complexity } of combinations) {
|
|
1297
|
-
const pairs = await this.#processCombination(
|
|
1298
|
-
introspection,
|
|
1299
|
-
persona8,
|
|
1300
|
-
complexity
|
|
1301
|
-
);
|
|
1302
|
-
if (pairs.length) {
|
|
1303
|
-
yield pairs;
|
|
1304
|
-
}
|
|
1305
|
-
}
|
|
1306
|
-
}
|
|
1307
|
-
/**
|
|
1308
|
-
* Processes a single persona × complexity combination by generating questions
|
|
1309
|
-
* and converting each to SQL in parallel.
|
|
1310
|
-
*/
|
|
1311
|
-
async #processCombination(introspection, persona8, complexity) {
|
|
1312
|
-
const personaContext = persona8 ? `As ${persona8.role}, ${persona8.perspective}
|
|
1313
|
-
|
|
1314
|
-
Generate questions this persona would ask.` : void 0;
|
|
1315
|
-
const prompt = personaContext ? `${personaContext}
|
|
1316
|
-
|
|
1317
|
-
Generate ${this.options.count} questions at ${complexity} complexity.` : void 0;
|
|
1318
|
-
const { questions } = await this.#limit(
|
|
1319
|
-
() => generateQuestions({
|
|
1320
|
-
introspection,
|
|
1321
|
-
complexity,
|
|
1322
|
-
count: this.options.count,
|
|
1323
|
-
prompt,
|
|
1324
|
-
model: this.options.model
|
|
1325
|
-
})
|
|
1326
|
-
);
|
|
1327
|
-
const pairs = await Promise.all(
|
|
1328
|
-
questions.map(async (question) => {
|
|
1329
|
-
const result = await this.#limit(async () => {
|
|
1330
|
-
try {
|
|
1331
|
-
return await toSql({
|
|
1332
|
-
input: question,
|
|
1333
|
-
adapter: this.adapter,
|
|
1334
|
-
fragments: this.options.teachings ?? [],
|
|
1335
|
-
model: this.options.model
|
|
1336
|
-
});
|
|
1337
|
-
} catch (error) {
|
|
1338
|
-
if (UnanswerableSQLError.isInstance(error)) {
|
|
1339
|
-
return {
|
|
1340
|
-
attempts: 0,
|
|
1341
|
-
sql: "",
|
|
1342
|
-
errors: [
|
|
1343
|
-
`Cannot answer the question ${question} because ${error.message}`
|
|
1344
|
-
]
|
|
1345
|
-
};
|
|
1346
|
-
}
|
|
1347
|
-
throw error;
|
|
1348
|
-
}
|
|
1349
|
-
});
|
|
1350
|
-
return {
|
|
1351
|
-
question,
|
|
1352
|
-
sql: result.sql,
|
|
1353
|
-
success: !result.errors || result.errors.length === 0
|
|
1354
|
-
};
|
|
1355
|
-
})
|
|
1356
|
-
);
|
|
1357
|
-
return pairs;
|
|
1358
|
-
}
|
|
1359
|
-
};
|
|
1360
|
-
|
|
1361
|
-
// packages/text2sql/src/lib/synthesis/synthesizers/breadth-evolver.ts
|
|
1362
|
-
import { groq as groq6 } from "@ai-sdk/groq";
|
|
1363
|
-
import dedent6 from "dedent";
|
|
1364
|
-
import pLimit2 from "p-limit";
|
|
1365
|
-
import z6 from "zod";
|
|
1366
|
-
import "@deepagents/agent";
|
|
1367
|
-
import {
|
|
1368
|
-
ContextEngine as ContextEngine6,
|
|
1369
|
-
InMemoryContextStore as InMemoryContextStore6,
|
|
1370
|
-
fragment as fragment6,
|
|
1371
|
-
guardrail as guardrail3,
|
|
1372
|
-
persona as personaFragment,
|
|
1373
|
-
structuredOutput as structuredOutput6,
|
|
1374
|
-
user as user6
|
|
1375
|
-
} from "@deepagents/context";
|
|
1376
|
-
|
|
1377
|
-
// packages/text2sql/src/lib/synthesis/synthesizers/styles.ts
|
|
1378
|
-
var ALL_STYLES = [
|
|
1379
|
-
"formal",
|
|
1380
|
-
// Professional business language
|
|
1381
|
-
"colloquial",
|
|
1382
|
-
// Casual everyday speech
|
|
1383
|
-
"imperative",
|
|
1384
|
-
// Commands: "Show me...", "Get..."
|
|
1385
|
-
"interrogative",
|
|
1386
|
-
// Questions: "What is...", "How many..."
|
|
1387
|
-
"descriptive",
|
|
1388
|
-
// Verbose, detailed
|
|
1389
|
-
"concise",
|
|
1390
|
-
// Brief, minimal
|
|
1391
|
-
"vague",
|
|
1392
|
-
// Ambiguous, hedging
|
|
1393
|
-
"metaphorical",
|
|
1394
|
-
// Figurative language
|
|
1395
|
-
"conversational"
|
|
1396
|
-
// Chat-like
|
|
1397
|
-
];
|
|
1398
|
-
var styleInstructions = {
|
|
1399
|
-
formal: "Use professional business language, complete sentences, no slang",
|
|
1400
|
-
colloquial: "Use casual everyday speech, contractions, informal tone",
|
|
1401
|
-
imperative: 'Phrase as commands: "Show me...", "Get...", "List..."',
|
|
1402
|
-
interrogative: 'Phrase as questions: "What is...", "How many...", "Which..."',
|
|
1403
|
-
descriptive: "Use detailed, verbose phrasing with extra context",
|
|
1404
|
-
concise: "Use minimal words, telegram-style brevity",
|
|
1405
|
-
vague: "Be intentionally ambiguous, use hedging language",
|
|
1406
|
-
metaphorical: "Use figurative language, analogies, creative phrasing",
|
|
1407
|
-
conversational: "Chat-like tone, as if talking to a colleague"
|
|
1408
|
-
};
|
|
1409
|
-
|
|
1410
|
-
// packages/text2sql/src/lib/synthesis/synthesizers/breadth-evolver.ts
|
|
1411
|
-
var paraphraserOutputSchema = z6.object({
|
|
1412
|
-
paraphrases: z6.array(
|
|
1413
|
-
z6.string().describe("A paraphrased version of the original question")
|
|
1414
|
-
).min(1).describe("List of paraphrased questions that would produce the same SQL")
|
|
1415
|
-
});
|
|
1416
|
-
async function paraphraseQuestion(params) {
|
|
1417
|
-
const context = new ContextEngine6({
|
|
1418
|
-
store: new InMemoryContextStore6(),
|
|
1419
|
-
chatId: `paraphraser-${crypto.randomUUID()}`,
|
|
1420
|
-
userId: "system"
|
|
1421
|
-
});
|
|
1422
|
-
const personaInstruction = params.persona ? dedent6`
|
|
1423
|
-
<persona role="${params.persona.role}">
|
|
1424
|
-
${params.persona.perspective}
|
|
1425
|
-
|
|
1426
|
-
Paraphrase the question as this persona would naturally ask it.
|
|
1427
|
-
Use their vocabulary, priorities, and framing style.
|
|
1428
|
-
</persona>
|
|
1429
|
-
` : "";
|
|
1430
|
-
const styleInstruction = params.persona?.styles && params.persona.styles.length > 0 ? dedent6`
|
|
1431
|
-
<communication_styles>
|
|
1432
|
-
Generate paraphrases using these communication styles: ${params.persona.styles.join(", ")}
|
|
1433
|
-
|
|
1434
|
-
Style definitions:
|
|
1435
|
-
${params.persona.styles.map((s) => `- ${s}: ${styleInstructions[s]}`).join("\n")}
|
|
1436
|
-
|
|
1437
|
-
Distribute paraphrases across these styles for variety.
|
|
1438
|
-
</communication_styles>
|
|
1439
|
-
` : "";
|
|
1440
|
-
context.set(
|
|
1441
|
-
personaFragment({
|
|
1442
|
-
name: "question_paraphraser",
|
|
1443
|
-
role: "You are a linguistic expert specializing in paraphrasing database questions. Your task is to generate alternative phrasings of questions that preserve the exact same semantic meaning - they must all produce the identical SQL query.",
|
|
1444
|
-
objective: "Generate paraphrased versions of questions that preserve exact semantic meaning and produce identical SQL"
|
|
1445
|
-
}),
|
|
1446
|
-
fragment6("original_question", params.question),
|
|
1447
|
-
fragment6(
|
|
1448
|
-
"reference_sql",
|
|
1449
|
-
params.sql,
|
|
1450
|
-
"This SQL shows what the question is really asking - all paraphrases must ask for exactly this"
|
|
1451
|
-
),
|
|
1452
|
-
...personaInstruction ? [fragment6("persona", personaInstruction)] : [],
|
|
1453
|
-
...styleInstruction ? [fragment6("communication_styles", styleInstruction)] : [],
|
|
1454
|
-
fragment6(
|
|
1455
|
-
"task",
|
|
1456
|
-
dedent6`
|
|
1457
|
-
Generate exactly ${params.count} paraphrased versions of the original question.
|
|
1458
|
-
|
|
1459
|
-
Requirements:
|
|
1460
|
-
1. Each paraphrase must be semantically equivalent - it should produce the EXACT same SQL
|
|
1461
|
-
2. Vary the sentence structure, word choice, and phrasing style
|
|
1462
|
-
3. Use natural language without SQL keywords (SELECT, WHERE, JOIN, etc.)
|
|
1463
|
-
4. Keep paraphrases realistic - how actual users would ask
|
|
1464
|
-
5. Do not add or remove any conditions, filters, or requirements from the original
|
|
1465
|
-
${params.persona?.styles?.length ? "6. Apply the specified communication styles to create diverse phrasings" : ""}
|
|
1466
|
-
`
|
|
1467
|
-
),
|
|
1468
|
-
guardrail3({ rule: "NEVER change what data is being requested" }),
|
|
1469
|
-
guardrail3({
|
|
1470
|
-
rule: "NEVER add filters, aggregations, or conditions not in the original"
|
|
1471
|
-
}),
|
|
1472
|
-
guardrail3({
|
|
1473
|
-
rule: "NEVER remove any specificity from the original question"
|
|
1474
|
-
}),
|
|
1475
|
-
guardrail3({
|
|
1476
|
-
rule: "All paraphrases must be answerable by the exact same SQL query"
|
|
1477
|
-
}),
|
|
1478
|
-
user6(
|
|
1479
|
-
`Paraphrase this question ${params.count} times: "${params.question}"`
|
|
1480
|
-
)
|
|
1481
|
-
);
|
|
1482
|
-
const paraphraserOutput = structuredOutput6({
|
|
1483
|
-
model: params.model ?? groq6("openai/gpt-oss-20b"),
|
|
1484
|
-
context,
|
|
1485
|
-
schema: paraphraserOutputSchema
|
|
1486
|
-
});
|
|
1487
|
-
return paraphraserOutput.generate();
|
|
1488
|
-
}
|
|
1489
|
-
var BreadthEvolver = class extends PairProducer {
|
|
1490
|
-
/**
|
|
1491
|
-
* @param source - Source pairs or producer to evolve
|
|
1492
|
-
* @param options - Evolution options including count, persona, and concurrency
|
|
1493
|
-
*/
|
|
1494
|
-
constructor(source, options) {
|
|
1495
|
-
super();
|
|
1496
|
-
this.source = source;
|
|
1497
|
-
this.options = options;
|
|
1498
|
-
this.#limit = pLimit2(this.options.concurrency ?? 4);
|
|
1499
|
-
}
|
|
1500
|
-
#limit;
|
|
1501
|
-
/**
|
|
1502
|
-
* Batch pairs within each chunk for concurrent processing.
|
|
1503
|
-
* Uses pLimit for concurrency control, yields results per pair after chunk completes.
|
|
1504
|
-
*/
|
|
1505
|
-
async *produce() {
|
|
1506
|
-
for await (const chunk of this.from(this.source)) {
|
|
1507
|
-
const tasks = chunk.map(
|
|
1508
|
-
(pair) => this.#limit(async () => {
|
|
1509
|
-
const result = await paraphraseQuestion({
|
|
1510
|
-
question: pair.question,
|
|
1511
|
-
sql: pair.sql,
|
|
1512
|
-
count: this.options.count,
|
|
1513
|
-
persona: this.options.persona,
|
|
1514
|
-
model: this.options.model
|
|
1515
|
-
});
|
|
1516
|
-
return result.paraphrases.map((paraphrase) => ({
|
|
1517
|
-
question: paraphrase,
|
|
1518
|
-
sql: pair.sql,
|
|
1519
|
-
context: pair.context,
|
|
1520
|
-
success: pair.success
|
|
1521
|
-
}));
|
|
1522
|
-
})
|
|
1523
|
-
);
|
|
1524
|
-
const results = await Promise.all(tasks);
|
|
1525
|
-
yield results.flat();
|
|
1526
|
-
}
|
|
1527
|
-
}
|
|
1528
|
-
};
|
|
1529
|
-
|
|
1530
1356
|
// packages/text2sql/src/lib/synthesis/synthesizers/depth-evolver.ts
|
|
1531
|
-
import { groq as groq7 } from "@ai-sdk/groq";
|
|
1532
|
-
import { NoObjectGeneratedError as NoObjectGeneratedError2, NoOutputGeneratedError as NoOutputGeneratedError2 } from "ai";
|
|
1533
|
-
import dedent7 from "dedent";
|
|
1534
|
-
import pLimit3 from "p-limit";
|
|
1535
|
-
import pRetry2 from "p-retry";
|
|
1536
|
-
import z7 from "zod";
|
|
1537
|
-
import "@deepagents/agent";
|
|
1538
|
-
import {
|
|
1539
|
-
ContextEngine as ContextEngine7,
|
|
1540
|
-
InMemoryContextStore as InMemoryContextStore7,
|
|
1541
|
-
fragment as fragment7,
|
|
1542
|
-
guardrail as guardrail4,
|
|
1543
|
-
persona as persona6,
|
|
1544
|
-
structuredOutput as structuredOutput7,
|
|
1545
|
-
user as user7
|
|
1546
|
-
} from "@deepagents/context";
|
|
1547
1357
|
var techniqueInstructions = {
|
|
1548
|
-
"add-aggregation":
|
|
1358
|
+
"add-aggregation": dedent6`
|
|
1549
1359
|
Add aggregation requirements to the question.
|
|
1550
1360
|
Transform it to require GROUP BY, COUNT, SUM, AVG, MIN, MAX, or similar operations.
|
|
1551
1361
|
Examples:
|
|
@@ -1553,7 +1363,7 @@ var techniqueInstructions = {
|
|
|
1553
1363
|
- "List products" → "What is the average price per category?"
|
|
1554
1364
|
- "Get employees" → "How many employees are in each department?"
|
|
1555
1365
|
`,
|
|
1556
|
-
"add-filter":
|
|
1366
|
+
"add-filter": dedent6`
|
|
1557
1367
|
Add filtering conditions to the question.
|
|
1558
1368
|
Transform it to require WHERE clauses with specific conditions.
|
|
1559
1369
|
Examples:
|
|
@@ -1561,7 +1371,7 @@ var techniqueInstructions = {
|
|
|
1561
1371
|
- "List customers" → "List customers who have made more than 5 purchases"
|
|
1562
1372
|
- "Get products" → "Get products with price above $100"
|
|
1563
1373
|
`,
|
|
1564
|
-
"add-join":
|
|
1374
|
+
"add-join": dedent6`
|
|
1565
1375
|
Add requirements that need data from related tables.
|
|
1566
1376
|
Transform it to require JOIN operations between multiple tables.
|
|
1567
1377
|
Examples:
|
|
@@ -1569,7 +1379,7 @@ var techniqueInstructions = {
|
|
|
1569
1379
|
- "List products" → "List products with their supplier information"
|
|
1570
1380
|
- "Get employees" → "Get employees with their department and manager names"
|
|
1571
1381
|
`,
|
|
1572
|
-
"add-reasoning":
|
|
1382
|
+
"add-reasoning": dedent6`
|
|
1573
1383
|
Add multi-step reasoning requirements.
|
|
1574
1384
|
Transform it to require logical deduction, comparisons, or derived calculations.
|
|
1575
1385
|
Examples:
|
|
@@ -1577,7 +1387,7 @@ var techniqueInstructions = {
|
|
|
1577
1387
|
- "List products" → "Which products are underperforming compared to their category average?"
|
|
1578
1388
|
- "Get revenue" → "Which month had the highest growth compared to the previous month?"
|
|
1579
1389
|
`,
|
|
1580
|
-
hypothetical:
|
|
1390
|
+
hypothetical: dedent6`
|
|
1581
1391
|
Add a hypothetical or speculative scenario.
|
|
1582
1392
|
Transform it to require applying calculations or projections.
|
|
1583
1393
|
Examples:
|
|
@@ -1586,36 +1396,36 @@ var techniqueInstructions = {
|
|
|
1586
1396
|
- "Get costs" → "What would be the impact of a 10% discount on profit margins?"
|
|
1587
1397
|
`
|
|
1588
1398
|
};
|
|
1589
|
-
var evolverOutputSchema =
|
|
1590
|
-
evolvedQuestion:
|
|
1399
|
+
var evolverOutputSchema = z6.object({
|
|
1400
|
+
evolvedQuestion: z6.string().describe("The evolved, more complex version of the original question")
|
|
1591
1401
|
});
|
|
1592
1402
|
async function evolveQuestion(params) {
|
|
1593
|
-
const context = new
|
|
1594
|
-
store: new
|
|
1403
|
+
const context = new ContextEngine6({
|
|
1404
|
+
store: new InMemoryContextStore6(),
|
|
1595
1405
|
chatId: `evolver-${crypto.randomUUID()}`,
|
|
1596
1406
|
userId: "system"
|
|
1597
1407
|
});
|
|
1598
1408
|
context.set(
|
|
1599
|
-
|
|
1409
|
+
persona5({
|
|
1600
1410
|
name: "question_evolver",
|
|
1601
1411
|
role: "You are an expert at evolving simple database questions into more complex ones. Your task is to take a basic question and transform it into a more sophisticated version that requires advanced SQL techniques to answer.",
|
|
1602
1412
|
objective: "Transform simple questions into complex versions requiring advanced SQL techniques"
|
|
1603
1413
|
}),
|
|
1604
|
-
|
|
1605
|
-
|
|
1414
|
+
fragment6("original_question", params.question),
|
|
1415
|
+
fragment6(
|
|
1606
1416
|
"original_sql",
|
|
1607
1417
|
params.sql,
|
|
1608
1418
|
"(This shows what the original question required)"
|
|
1609
1419
|
),
|
|
1610
|
-
|
|
1611
|
-
|
|
1420
|
+
fragment6("database_schema", params.schema),
|
|
1421
|
+
fragment6(
|
|
1612
1422
|
"technique",
|
|
1613
1423
|
{ name: params.technique },
|
|
1614
1424
|
params.techniqueInstruction
|
|
1615
1425
|
),
|
|
1616
|
-
|
|
1426
|
+
fragment6(
|
|
1617
1427
|
"task",
|
|
1618
|
-
|
|
1428
|
+
dedent6`
|
|
1619
1429
|
Evolve the original question using the "${params.technique}" technique.
|
|
1620
1430
|
|
|
1621
1431
|
Requirements:
|
|
@@ -1627,22 +1437,22 @@ async function evolveQuestion(params) {
|
|
|
1627
1437
|
6. The evolved question should build upon the original topic/domain
|
|
1628
1438
|
`
|
|
1629
1439
|
),
|
|
1630
|
-
|
|
1440
|
+
guardrail3({
|
|
1631
1441
|
rule: "The evolved question MUST require more complex SQL than the original"
|
|
1632
1442
|
}),
|
|
1633
|
-
|
|
1443
|
+
guardrail3({
|
|
1634
1444
|
rule: "Do not ask for data that does not exist in the schema"
|
|
1635
1445
|
}),
|
|
1636
|
-
|
|
1446
|
+
guardrail3({
|
|
1637
1447
|
rule: "Keep the question grounded in the same domain as the original"
|
|
1638
1448
|
}),
|
|
1639
|
-
|
|
1640
|
-
|
|
1449
|
+
guardrail3({ rule: "Make sure the question is clear and unambiguous" }),
|
|
1450
|
+
user6(
|
|
1641
1451
|
`Evolve this question using "${params.technique}": "${params.question}"`
|
|
1642
1452
|
)
|
|
1643
1453
|
);
|
|
1644
|
-
const evolverOutput =
|
|
1645
|
-
model: params.model ??
|
|
1454
|
+
const evolverOutput = structuredOutput6({
|
|
1455
|
+
model: params.model ?? groq6("openai/gpt-oss-20b"),
|
|
1646
1456
|
context,
|
|
1647
1457
|
schema: evolverOutputSchema
|
|
1648
1458
|
});
|
|
@@ -1666,7 +1476,7 @@ var DepthEvolver = class extends PairProducer {
|
|
|
1666
1476
|
this.source = source;
|
|
1667
1477
|
this.adapter = adapter;
|
|
1668
1478
|
this.options = options;
|
|
1669
|
-
this.#limit =
|
|
1479
|
+
this.#limit = pLimit2(this.options?.concurrency ?? 4);
|
|
1670
1480
|
}
|
|
1671
1481
|
#limit;
|
|
1672
1482
|
/**
|
|
@@ -1757,28 +1567,28 @@ async function withRetry2(computation) {
|
|
|
1757
1567
|
}
|
|
1758
1568
|
|
|
1759
1569
|
// packages/text2sql/src/lib/synthesis/synthesizers/persona-generator.ts
|
|
1760
|
-
import { groq as
|
|
1761
|
-
import
|
|
1762
|
-
import
|
|
1570
|
+
import { groq as groq7 } from "@ai-sdk/groq";
|
|
1571
|
+
import dedent7 from "dedent";
|
|
1572
|
+
import z7 from "zod";
|
|
1763
1573
|
import "@deepagents/agent";
|
|
1764
1574
|
import {
|
|
1765
|
-
ContextEngine as
|
|
1766
|
-
InMemoryContextStore as
|
|
1575
|
+
ContextEngine as ContextEngine7,
|
|
1576
|
+
InMemoryContextStore as InMemoryContextStore7,
|
|
1767
1577
|
XmlRenderer,
|
|
1768
|
-
fragment as
|
|
1769
|
-
guardrail as
|
|
1578
|
+
fragment as fragment7,
|
|
1579
|
+
guardrail as guardrail4,
|
|
1770
1580
|
persona as personaFragment2,
|
|
1771
|
-
structuredOutput as
|
|
1772
|
-
user as
|
|
1581
|
+
structuredOutput as structuredOutput7,
|
|
1582
|
+
user as user7
|
|
1773
1583
|
} from "@deepagents/context";
|
|
1774
|
-
var
|
|
1775
|
-
personas:
|
|
1776
|
-
|
|
1777
|
-
role:
|
|
1778
|
-
perspective:
|
|
1584
|
+
var outputSchema2 = z7.object({
|
|
1585
|
+
personas: z7.array(
|
|
1586
|
+
z7.object({
|
|
1587
|
+
role: z7.string().describe("The job title or role of this persona"),
|
|
1588
|
+
perspective: z7.string().describe(
|
|
1779
1589
|
"Rich description of what this persona cares about when querying the database"
|
|
1780
1590
|
),
|
|
1781
|
-
styles:
|
|
1591
|
+
styles: z7.array(z7.enum(ALL_STYLES)).min(1).max(3).describe(
|
|
1782
1592
|
"Typical communication styles for this persona (1-3 styles)"
|
|
1783
1593
|
)
|
|
1784
1594
|
})
|
|
@@ -1787,8 +1597,8 @@ var outputSchema3 = z8.object({
|
|
|
1787
1597
|
async function generatePersonas(schemaFragments, options) {
|
|
1788
1598
|
const schema = new XmlRenderer().render(schemaFragments);
|
|
1789
1599
|
const count = options?.count ?? 5;
|
|
1790
|
-
const context = new
|
|
1791
|
-
store: new
|
|
1600
|
+
const context = new ContextEngine7({
|
|
1601
|
+
store: new InMemoryContextStore7(),
|
|
1792
1602
|
chatId: `persona-gen-${crypto.randomUUID()}`,
|
|
1793
1603
|
userId: "system"
|
|
1794
1604
|
});
|
|
@@ -1798,10 +1608,10 @@ async function generatePersonas(schemaFragments, options) {
|
|
|
1798
1608
|
role: "You are an expert at understanding database schemas and inferring who would use them.",
|
|
1799
1609
|
objective: "Generate realistic personas representing users who would query this database"
|
|
1800
1610
|
}),
|
|
1801
|
-
|
|
1802
|
-
|
|
1611
|
+
fragment7("database_schema", schema),
|
|
1612
|
+
fragment7(
|
|
1803
1613
|
"task",
|
|
1804
|
-
|
|
1614
|
+
dedent7`
|
|
1805
1615
|
Analyze the database schema and generate realistic personas representing
|
|
1806
1616
|
the different types of users who would query this database.
|
|
1807
1617
|
|
|
@@ -1832,9 +1642,9 @@ async function generatePersonas(schemaFragments, options) {
|
|
|
1832
1642
|
- Styles should match how this persona would naturally communicate
|
|
1833
1643
|
`
|
|
1834
1644
|
),
|
|
1835
|
-
|
|
1645
|
+
fragment7(
|
|
1836
1646
|
"example",
|
|
1837
|
-
|
|
1647
|
+
dedent7`
|
|
1838
1648
|
For an e-commerce schema with orders, customers, products tables:
|
|
1839
1649
|
|
|
1840
1650
|
{
|
|
@@ -1850,237 +1660,235 @@ async function generatePersonas(schemaFragments, options) {
|
|
|
1850
1660
|
}
|
|
1851
1661
|
`
|
|
1852
1662
|
),
|
|
1853
|
-
|
|
1663
|
+
guardrail4({
|
|
1854
1664
|
rule: "Only generate personas relevant to the actual schema provided"
|
|
1855
1665
|
}),
|
|
1856
|
-
|
|
1666
|
+
guardrail4({
|
|
1857
1667
|
rule: "Do not invent tables or data that do not exist in the schema"
|
|
1858
1668
|
}),
|
|
1859
|
-
|
|
1669
|
+
guardrail4({
|
|
1860
1670
|
rule: "Ensure perspectives are specific to the domain, not generic"
|
|
1861
1671
|
}),
|
|
1862
|
-
|
|
1672
|
+
user7(
|
|
1863
1673
|
`Generate exactly ${count} distinct personas who would query this database.`
|
|
1864
1674
|
)
|
|
1865
1675
|
);
|
|
1866
|
-
const personaOutput =
|
|
1867
|
-
model: options?.model ??
|
|
1676
|
+
const personaOutput = structuredOutput7({
|
|
1677
|
+
model: options?.model ?? groq7("openai/gpt-oss-20b"),
|
|
1868
1678
|
context,
|
|
1869
|
-
schema:
|
|
1679
|
+
schema: outputSchema2
|
|
1870
1680
|
});
|
|
1871
1681
|
const output = await personaOutput.generate();
|
|
1872
1682
|
return output.personas;
|
|
1873
1683
|
}
|
|
1874
1684
|
|
|
1875
|
-
// packages/text2sql/src/lib/synthesis/synthesizers/
|
|
1876
|
-
import
|
|
1685
|
+
// packages/text2sql/src/lib/synthesis/synthesizers/schema-synthesizer.ts
|
|
1686
|
+
import pLimit3 from "p-limit";
|
|
1877
1687
|
|
|
1878
|
-
// packages/text2sql/src/lib/agents/
|
|
1879
|
-
import { groq as
|
|
1880
|
-
import
|
|
1881
|
-
import
|
|
1688
|
+
// packages/text2sql/src/lib/agents/question.agent.ts
|
|
1689
|
+
import { groq as groq8 } from "@ai-sdk/groq";
|
|
1690
|
+
import dedent8 from "dedent";
|
|
1691
|
+
import z8 from "zod";
|
|
1882
1692
|
import "@deepagents/agent";
|
|
1883
1693
|
import {
|
|
1884
|
-
ContextEngine as
|
|
1885
|
-
InMemoryContextStore as
|
|
1886
|
-
|
|
1887
|
-
|
|
1888
|
-
|
|
1889
|
-
|
|
1890
|
-
|
|
1891
|
-
guardrail as guardrail6,
|
|
1892
|
-
hint as hint2,
|
|
1893
|
-
persona as persona7,
|
|
1894
|
-
quirk,
|
|
1895
|
-
structuredOutput as structuredOutput9,
|
|
1896
|
-
styleGuide,
|
|
1897
|
-
term,
|
|
1898
|
-
user as user9,
|
|
1899
|
-
workflow as workflow2
|
|
1694
|
+
ContextEngine as ContextEngine8,
|
|
1695
|
+
InMemoryContextStore as InMemoryContextStore8,
|
|
1696
|
+
fragment as fragment8,
|
|
1697
|
+
guardrail as guardrail5,
|
|
1698
|
+
persona as persona6,
|
|
1699
|
+
structuredOutput as structuredOutput8,
|
|
1700
|
+
user as user8
|
|
1900
1701
|
} from "@deepagents/context";
|
|
1901
|
-
var
|
|
1902
|
-
|
|
1903
|
-
|
|
1904
|
-
|
|
1905
|
-
|
|
1906
|
-
|
|
1907
|
-
|
|
1908
|
-
|
|
1909
|
-
|
|
1910
|
-
|
|
1911
|
-
|
|
1912
|
-
|
|
1913
|
-
|
|
1914
|
-
|
|
1915
|
-
|
|
1916
|
-
|
|
1917
|
-
|
|
1918
|
-
|
|
1919
|
-
|
|
1920
|
-
|
|
1921
|
-
|
|
1922
|
-
|
|
1923
|
-
|
|
1924
|
-
|
|
1925
|
-
|
|
1926
|
-
|
|
1927
|
-
|
|
1928
|
-
|
|
1929
|
-
|
|
1930
|
-
|
|
1931
|
-
|
|
1932
|
-
|
|
1933
|
-
|
|
1934
|
-
|
|
1935
|
-
|
|
1936
|
-
|
|
1937
|
-
|
|
1938
|
-
|
|
1939
|
-
|
|
1940
|
-
|
|
1941
|
-
).optional().describe("SQL style preferences"),
|
|
1942
|
-
analogies: z9.array(
|
|
1943
|
-
z9.object({
|
|
1944
|
-
concepts: z9.array(z9.string()).min(2),
|
|
1945
|
-
relationship: z9.string(),
|
|
1946
|
-
insight: z9.string().optional(),
|
|
1947
|
-
therefore: z9.string().optional(),
|
|
1948
|
-
pitfall: z9.string().optional()
|
|
1949
|
-
})
|
|
1950
|
-
).optional().describe("Concept analogies")
|
|
1702
|
+
var complexityInstructions = {
|
|
1703
|
+
simple: dedent8`
|
|
1704
|
+
Generate simple questions that require:
|
|
1705
|
+
- Basic SELECT with single table
|
|
1706
|
+
- Simple WHERE clauses with one condition
|
|
1707
|
+
- COUNT(*) or basic aggregations
|
|
1708
|
+
- No joins required
|
|
1709
|
+
Examples: "How many customers do we have?", "List all products", "What is the total revenue?"
|
|
1710
|
+
`,
|
|
1711
|
+
moderate: dedent8`
|
|
1712
|
+
Generate moderate questions that require:
|
|
1713
|
+
- JOINs between 2-3 tables
|
|
1714
|
+
- Multiple WHERE conditions (AND/OR)
|
|
1715
|
+
- GROUP BY with HAVING clauses
|
|
1716
|
+
- ORDER BY with LIMIT
|
|
1717
|
+
- Basic subqueries
|
|
1718
|
+
Examples: "What are the top 5 customers by total orders?", "Which products have never been ordered?"
|
|
1719
|
+
`,
|
|
1720
|
+
complex: dedent8`
|
|
1721
|
+
Generate complex questions that require:
|
|
1722
|
+
- Multiple JOINs (3+ tables)
|
|
1723
|
+
- Nested subqueries or CTEs
|
|
1724
|
+
- Complex aggregations with multiple GROUP BY columns
|
|
1725
|
+
- CASE expressions
|
|
1726
|
+
- Date/time calculations
|
|
1727
|
+
Examples: "What is the month-over-month growth rate?", "Which customers have increased spending compared to last year?"
|
|
1728
|
+
`,
|
|
1729
|
+
"high complex": dedent8`
|
|
1730
|
+
Generate highly complex questions that require advanced SQL features:
|
|
1731
|
+
- Window functions (ROW_NUMBER, RANK, DENSE_RANK)
|
|
1732
|
+
- LAG, LEAD for comparisons
|
|
1733
|
+
- Running totals (SUM OVER)
|
|
1734
|
+
- Moving averages
|
|
1735
|
+
- PARTITION BY clauses
|
|
1736
|
+
- Complex CTEs with multiple levels
|
|
1737
|
+
Examples: "What is the running total of sales per month?", "Rank customers by their purchase frequency within each region"
|
|
1738
|
+
`
|
|
1739
|
+
};
|
|
1740
|
+
var outputSchema3 = z8.object({
|
|
1741
|
+
questions: z8.array(z8.string().describe("A natural language question about the data")).min(1).describe("List of natural language questions a user might ask")
|
|
1951
1742
|
});
|
|
1952
|
-
async function
|
|
1953
|
-
const
|
|
1954
|
-
|
|
1955
|
-
|
|
1743
|
+
async function generateQuestions(params) {
|
|
1744
|
+
const { introspection, complexity, count, prompt, model } = params;
|
|
1745
|
+
const context = new ContextEngine8({
|
|
1746
|
+
store: new InMemoryContextStore8(),
|
|
1747
|
+
chatId: `question-gen-${crypto.randomUUID()}`,
|
|
1956
1748
|
userId: "system"
|
|
1957
1749
|
});
|
|
1958
1750
|
context.set(
|
|
1959
|
-
|
|
1960
|
-
name: "
|
|
1961
|
-
role:
|
|
1962
|
-
objective: "
|
|
1751
|
+
persona6({
|
|
1752
|
+
name: "question_generator",
|
|
1753
|
+
role: "You are a synthetic data generator specializing in creating realistic natural language questions that users might ask about a database.",
|
|
1754
|
+
objective: "Generate diverse, realistic natural language questions that match the specified complexity level"
|
|
1963
1755
|
}),
|
|
1964
|
-
|
|
1965
|
-
|
|
1966
|
-
|
|
1967
|
-
|
|
1968
|
-
|
|
1969
|
-
Output a JSON object with these optional arrays (include only relevant ones):
|
|
1970
|
-
- terms: [{ name: string, definition: string }] - Domain terminology
|
|
1971
|
-
- hints: [{ text: string }] - Helpful SQL generation hints
|
|
1972
|
-
- guardrails: [{ rule: string, reason?: string, action?: string }] - Safety constraints
|
|
1973
|
-
- explains: [{ concept: string, explanation: string, therefore?: string }] - Concept explanations
|
|
1974
|
-
- examples: [{ question: string, answer: string, note?: string }] - Q&A examples
|
|
1975
|
-
- clarifications: [{ when: string, ask: string, reason: string }] - Clarification triggers
|
|
1976
|
-
- workflows: [{ task: string, steps: string[], triggers?: string[], notes?: string }] - Multi-step tasks
|
|
1977
|
-
- quirks: [{ issue: string, workaround: string }] - Known issues
|
|
1978
|
-
- styleGuides: [{ prefer: string, never?: string, always?: string }] - SQL style rules
|
|
1979
|
-
- analogies: [{ concepts: string[], relationship: string, insight?: string, therefore?: string, pitfall?: string }]
|
|
1980
|
-
`
|
|
1756
|
+
fragment8("database_schema", introspection || ""),
|
|
1757
|
+
fragment8(
|
|
1758
|
+
"complexity",
|
|
1759
|
+
{ level: complexity },
|
|
1760
|
+
complexityInstructions[complexity]
|
|
1981
1761
|
),
|
|
1982
|
-
|
|
1762
|
+
fragment8(
|
|
1983
1763
|
"task",
|
|
1984
|
-
|
|
1985
|
-
|
|
1986
|
-
|
|
1987
|
-
|
|
1988
|
-
|
|
1989
|
-
|
|
1990
|
-
|
|
1991
|
-
4. Only include categories that are relevant to this schema.
|
|
1764
|
+
dedent8`
|
|
1765
|
+
Generate exactly ${count} natural language questions at the "${complexity}" complexity level.
|
|
1766
|
+
The questions should:
|
|
1767
|
+
1. Match the complexity requirements above
|
|
1768
|
+
2. Use natural business language, not technical SQL terms
|
|
1769
|
+
3. Be realistic questions a non-technical user would actually ask
|
|
1770
|
+
4. Cover different tables and relationships when possible
|
|
1992
1771
|
`
|
|
1993
1772
|
),
|
|
1994
|
-
|
|
1995
|
-
|
|
1773
|
+
guardrail5({
|
|
1774
|
+
rule: "Questions MUST ONLY reference tables and columns that exist in the schema above"
|
|
1775
|
+
}),
|
|
1776
|
+
guardrail5({
|
|
1777
|
+
rule: "Before generating each question, verify that ALL entities (tables, columns, relationships) you reference are explicitly listed in the schema"
|
|
1778
|
+
}),
|
|
1779
|
+
guardrail5({
|
|
1780
|
+
rule: "DO NOT invent or assume tables/columns that are not explicitly shown in the schema"
|
|
1781
|
+
}),
|
|
1782
|
+
guardrail5({
|
|
1783
|
+
rule: "Use natural language without SQL keywords like SELECT, WHERE, etc."
|
|
1784
|
+
}),
|
|
1785
|
+
guardrail5({
|
|
1786
|
+
rule: "All questions must match the specified complexity level"
|
|
1787
|
+
}),
|
|
1788
|
+
user8(
|
|
1789
|
+
prompt ?? `Generate ${count} questions at ${complexity} complexity given db schema.`
|
|
1996
1790
|
)
|
|
1997
1791
|
);
|
|
1998
|
-
const
|
|
1999
|
-
model:
|
|
1792
|
+
const questionOutput = structuredOutput8({
|
|
1793
|
+
model: model ?? groq8("openai/gpt-oss-20b"),
|
|
2000
1794
|
context,
|
|
2001
|
-
schema:
|
|
1795
|
+
schema: outputSchema3
|
|
2002
1796
|
});
|
|
2003
|
-
|
|
2004
|
-
const fragments = [];
|
|
2005
|
-
result.terms?.forEach((t) => fragments.push(term(t.name, t.definition)));
|
|
2006
|
-
result.hints?.forEach((h) => fragments.push(hint2(h.text)));
|
|
2007
|
-
result.guardrails?.forEach(
|
|
2008
|
-
(g) => fragments.push(
|
|
2009
|
-
guardrail6({ rule: g.rule, reason: g.reason, action: g.action })
|
|
2010
|
-
)
|
|
2011
|
-
);
|
|
2012
|
-
result.explains?.forEach(
|
|
2013
|
-
(e) => fragments.push(
|
|
2014
|
-
explain({
|
|
2015
|
-
concept: e.concept,
|
|
2016
|
-
explanation: e.explanation,
|
|
2017
|
-
therefore: e.therefore
|
|
2018
|
-
})
|
|
2019
|
-
)
|
|
2020
|
-
);
|
|
2021
|
-
result.examples?.forEach(
|
|
2022
|
-
(e) => fragments.push(
|
|
2023
|
-
example2({ question: e.question, answer: e.answer, note: e.note })
|
|
2024
|
-
)
|
|
2025
|
-
);
|
|
2026
|
-
result.clarifications?.forEach(
|
|
2027
|
-
(c) => fragments.push(
|
|
2028
|
-
clarification({ when: c.when, ask: c.ask, reason: c.reason })
|
|
2029
|
-
)
|
|
2030
|
-
);
|
|
2031
|
-
result.workflows?.forEach(
|
|
2032
|
-
(w) => fragments.push(
|
|
2033
|
-
workflow2({
|
|
2034
|
-
task: w.task,
|
|
2035
|
-
steps: w.steps,
|
|
2036
|
-
triggers: w.triggers,
|
|
2037
|
-
notes: w.notes
|
|
2038
|
-
})
|
|
2039
|
-
)
|
|
2040
|
-
);
|
|
2041
|
-
result.quirks?.forEach(
|
|
2042
|
-
(q) => fragments.push(quirk({ issue: q.issue, workaround: q.workaround }))
|
|
2043
|
-
);
|
|
2044
|
-
result.styleGuides?.forEach(
|
|
2045
|
-
(s) => fragments.push(
|
|
2046
|
-
styleGuide({ prefer: s.prefer, never: s.never, always: s.always })
|
|
2047
|
-
)
|
|
2048
|
-
);
|
|
2049
|
-
result.analogies?.forEach(
|
|
2050
|
-
(a) => fragments.push(
|
|
2051
|
-
analogy({
|
|
2052
|
-
concepts: a.concepts,
|
|
2053
|
-
relationship: a.relationship,
|
|
2054
|
-
insight: a.insight,
|
|
2055
|
-
therefore: a.therefore,
|
|
2056
|
-
pitfall: a.pitfall
|
|
2057
|
-
})
|
|
2058
|
-
)
|
|
2059
|
-
);
|
|
2060
|
-
return fragments;
|
|
1797
|
+
return questionOutput.generate();
|
|
2061
1798
|
}
|
|
2062
1799
|
|
|
2063
|
-
// packages/text2sql/src/lib/synthesis/synthesizers/
|
|
2064
|
-
|
|
2065
|
-
|
|
2066
|
-
|
|
2067
|
-
|
|
2068
|
-
|
|
2069
|
-
|
|
2070
|
-
|
|
2071
|
-
|
|
2072
|
-
|
|
1800
|
+
// packages/text2sql/src/lib/synthesis/synthesizers/schema-synthesizer.ts
|
|
1801
|
+
var SchemaSynthesizer = class extends PairProducer {
|
|
1802
|
+
/**
|
|
1803
|
+
* @param adapter - Database adapter for schema introspection and SQL validation
|
|
1804
|
+
* @param options - Synthesis configuration including count, complexity, and concurrency
|
|
1805
|
+
*/
|
|
1806
|
+
constructor(adapter, options) {
|
|
1807
|
+
super();
|
|
1808
|
+
this.adapter = adapter;
|
|
1809
|
+
this.options = options;
|
|
1810
|
+
this.#complexities = Array.isArray(this.options.complexity) ? this.options.complexity : [this.options.complexity ?? "moderate"];
|
|
1811
|
+
this.#personas = this.options.personas ?? [void 0];
|
|
1812
|
+
this.#limit = pLimit3(this.options.concurrency ?? 5);
|
|
1813
|
+
}
|
|
1814
|
+
#complexities = [];
|
|
1815
|
+
#personas = [];
|
|
1816
|
+
#limit;
|
|
1817
|
+
/**
|
|
1818
|
+
* Generates question-SQL pairs by iterating through all persona × complexity combinations.
|
|
1819
|
+
* Uses parallel processing bounded by the configured concurrency limit.
|
|
1820
|
+
* Yields results as each combination completes (streaming pattern).
|
|
1821
|
+
* @returns Generated pairs from all combinations
|
|
1822
|
+
*/
|
|
1823
|
+
async *produce() {
|
|
1824
|
+
const introspection = "";
|
|
1825
|
+
const combinations = this.#personas.flatMap(
|
|
1826
|
+
(persona7) => this.#complexities.map((complexity) => ({ persona: persona7, complexity }))
|
|
1827
|
+
);
|
|
1828
|
+
for (const { persona: persona7, complexity } of combinations) {
|
|
1829
|
+
const pairs = await this.#processCombination(
|
|
1830
|
+
introspection,
|
|
1831
|
+
persona7,
|
|
1832
|
+
complexity
|
|
2073
1833
|
);
|
|
2074
|
-
|
|
2075
|
-
|
|
2076
|
-
const isRetryable = lastError.message.includes("parse") || lastError.message.includes("schema") || lastError.message.includes("No object generated") || lastError.name.includes("AI_");
|
|
2077
|
-
if (!isRetryable) {
|
|
2078
|
-
throw lastError;
|
|
1834
|
+
if (pairs.length) {
|
|
1835
|
+
yield pairs;
|
|
2079
1836
|
}
|
|
2080
1837
|
}
|
|
2081
1838
|
}
|
|
2082
|
-
|
|
2083
|
-
|
|
1839
|
+
/**
|
|
1840
|
+
* Processes a single persona × complexity combination by generating questions
|
|
1841
|
+
* and converting each to SQL in parallel.
|
|
1842
|
+
*/
|
|
1843
|
+
async #processCombination(introspection, persona7, complexity) {
|
|
1844
|
+
const personaContext = persona7 ? `As ${persona7.role}, ${persona7.perspective}
|
|
1845
|
+
|
|
1846
|
+
Generate questions this persona would ask.` : void 0;
|
|
1847
|
+
const prompt = personaContext ? `${personaContext}
|
|
1848
|
+
|
|
1849
|
+
Generate ${this.options.count} questions at ${complexity} complexity.` : void 0;
|
|
1850
|
+
const { questions } = await this.#limit(
|
|
1851
|
+
() => generateQuestions({
|
|
1852
|
+
introspection,
|
|
1853
|
+
complexity,
|
|
1854
|
+
count: this.options.count,
|
|
1855
|
+
prompt,
|
|
1856
|
+
model: this.options.model
|
|
1857
|
+
})
|
|
1858
|
+
);
|
|
1859
|
+
const pairs = await Promise.all(
|
|
1860
|
+
questions.map(async (question) => {
|
|
1861
|
+
const result = await this.#limit(async () => {
|
|
1862
|
+
try {
|
|
1863
|
+
return await toSql({
|
|
1864
|
+
input: question,
|
|
1865
|
+
adapter: this.adapter,
|
|
1866
|
+
fragments: this.options.teachings ?? [],
|
|
1867
|
+
model: this.options.model
|
|
1868
|
+
});
|
|
1869
|
+
} catch (error) {
|
|
1870
|
+
if (UnanswerableSQLError.isInstance(error)) {
|
|
1871
|
+
return {
|
|
1872
|
+
attempts: 0,
|
|
1873
|
+
sql: "",
|
|
1874
|
+
errors: [
|
|
1875
|
+
`Cannot answer the question ${question} because ${error.message}`
|
|
1876
|
+
]
|
|
1877
|
+
};
|
|
1878
|
+
}
|
|
1879
|
+
throw error;
|
|
1880
|
+
}
|
|
1881
|
+
});
|
|
1882
|
+
return {
|
|
1883
|
+
question,
|
|
1884
|
+
sql: result.sql,
|
|
1885
|
+
success: !result.errors || result.errors.length === 0
|
|
1886
|
+
};
|
|
1887
|
+
})
|
|
1888
|
+
);
|
|
1889
|
+
return pairs;
|
|
1890
|
+
}
|
|
1891
|
+
};
|
|
2084
1892
|
export {
|
|
2085
1893
|
ALL_STYLES,
|
|
2086
1894
|
BaseContextualExtractor,
|
|
@@ -2099,7 +1907,6 @@ export {
|
|
|
2099
1907
|
WindowedContextExtractor,
|
|
2100
1908
|
formatConversation,
|
|
2101
1909
|
generatePersonas,
|
|
2102
|
-
generateTeachings,
|
|
2103
1910
|
getMessageText,
|
|
2104
1911
|
resolveContext,
|
|
2105
1912
|
styleInstructions,
|