@deepagents/text2sql 0.19.0 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +210 -65
- package/dist/index.js.map +4 -4
- package/dist/lib/agents/exceptions.d.ts +20 -0
- package/dist/lib/agents/exceptions.d.ts.map +1 -0
- package/dist/lib/agents/sql.agent.d.ts +0 -17
- package/dist/lib/agents/sql.agent.d.ts.map +1 -1
- package/dist/lib/synthesis/index.js +224 -79
- package/dist/lib/synthesis/index.js.map +4 -4
- package/dist/lib/synthesis/synthesizers/depth-evolver.d.ts.map +1 -1
- package/dist/lib/synthesis/synthesizers/schema-synthesizer.d.ts.map +1 -1
- package/package.json +4 -4
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
declare const sqlValidationMarker: unique symbol;
|
|
2
|
+
declare const unanswerableSqlMarker: unique symbol;
|
|
3
|
+
/**
|
|
4
|
+
* Error thrown when SQL validation fails.
|
|
5
|
+
*/
|
|
6
|
+
export declare class SQLValidationError extends Error {
|
|
7
|
+
[sqlValidationMarker]: true;
|
|
8
|
+
constructor(message: string);
|
|
9
|
+
static isInstance(error: unknown): error is SQLValidationError;
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Error thrown when the question cannot be answered with the given schema.
|
|
13
|
+
*/
|
|
14
|
+
export declare class UnanswerableSQLError extends Error {
|
|
15
|
+
[unanswerableSqlMarker]: true;
|
|
16
|
+
constructor(message: string);
|
|
17
|
+
static isInstance(error: unknown): error is UnanswerableSQLError;
|
|
18
|
+
}
|
|
19
|
+
export {};
|
|
20
|
+
//# sourceMappingURL=exceptions.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"exceptions.d.ts","sourceRoot":"","sources":["../../../src/lib/agents/exceptions.ts"],"names":[],"mappings":"AAAA,QAAA,MAAM,mBAAmB,eAA+B,CAAC;AACzD,QAAA,MAAM,qBAAqB,eAAiC,CAAC;AAE7D;;GAEG;AACH,qBAAa,kBAAmB,SAAQ,KAAK;IAC3C,CAAC,mBAAmB,CAAC,EAAE,IAAI,CAAC;gBAEhB,OAAO,EAAE,MAAM;IAM3B,MAAM,CAAC,UAAU,CAAC,KAAK,EAAE,OAAO,GAAG,KAAK,IAAI,kBAAkB;CAK/D;AAED;;GAEG;AACH,qBAAa,oBAAqB,SAAQ,KAAK;IAC7C,CAAC,qBAAqB,CAAC,EAAE,IAAI,CAAC;gBAElB,OAAO,EAAE,MAAM;IAM3B,MAAM,CAAC,UAAU,CAAC,KAAK,EAAE,OAAO,GAAG,KAAK,IAAI,oBAAoB;CAMjE"}
|
|
@@ -21,22 +21,5 @@ export interface ToSqlResult {
|
|
|
21
21
|
/** Validation errors encountered (if any retries occurred) */
|
|
22
22
|
errors?: string[];
|
|
23
23
|
}
|
|
24
|
-
declare const marker: unique symbol;
|
|
25
|
-
/**
|
|
26
|
-
* Error thrown when SQL validation fails.
|
|
27
|
-
*/
|
|
28
|
-
export declare class SQLValidationError extends Error {
|
|
29
|
-
[marker]: true;
|
|
30
|
-
constructor(message: string);
|
|
31
|
-
static isInstance(error: unknown): error is SQLValidationError;
|
|
32
|
-
}
|
|
33
|
-
/**
|
|
34
|
-
* Error thrown when the question cannot be answered with the given schema.
|
|
35
|
-
*/
|
|
36
|
-
export declare class UnanswerableSQLError extends Error {
|
|
37
|
-
constructor(message: string);
|
|
38
|
-
static isInstance(error: unknown): error is UnanswerableSQLError;
|
|
39
|
-
}
|
|
40
24
|
export declare function toSql(options: ToSqlOptions): Promise<ToSqlResult>;
|
|
41
|
-
export {};
|
|
42
25
|
//# sourceMappingURL=sql.agent.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"sql.agent.d.ts","sourceRoot":"","sources":["../../../src/lib/agents/sql.agent.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"sql.agent.d.ts","sourceRoot":"","sources":["../../../src/lib/agents/sql.agent.ts"],"names":[],"mappings":"AAeA,OAAO,EAAE,KAAK,UAAU,EAAE,MAAM,mBAAmB,CAAC;AACpD,OAAO,EAEL,KAAK,eAAe,EAOrB,MAAM,qBAAqB,CAAC;AAE7B,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,wBAAwB,CAAC;AAGtD,MAAM,WAAW,YAAY;IAC3B,mDAAmD;IACnD,KAAK,EAAE,MAAM,CAAC;IACd,sCAAsC;IACtC,OAAO,EAAE,OAAO,CAAC;IACjB,0DAA0D;IAC1D,SAAS,EAAE,eAAe,EAAE,CAAC;IAC7B,8BAA8B;IAC9B,KAAK,EAAE,UAAU,CAAC;IAClB,gEAAgE;IAChE,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,WAAW;IAC1B,8BAA8B;IAC9B,GAAG,EAAE,MAAM,CAAC;IACZ,8BAA8B;IAC9B,QAAQ,EAAE,MAAM,CAAC;IACjB,8DAA8D;IAC9D,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;CACnB;AA4GD,wBAAsB,KAAK,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,WAAW,CAAC,CAoJvE"}
|
|
@@ -714,6 +714,32 @@ var LastQueryExtractor = class extends BaseContextualExtractor {
|
|
|
714
714
|
// packages/text2sql/src/lib/synthesis/synthesizers/schema-synthesizer.ts
|
|
715
715
|
import pLimit from "p-limit";
|
|
716
716
|
|
|
717
|
+
// packages/text2sql/src/lib/agents/exceptions.ts
|
|
718
|
+
var sqlValidationMarker = Symbol("SQLValidationError");
|
|
719
|
+
var unanswerableSqlMarker = Symbol("UnanswerableSQLError");
|
|
720
|
+
var SQLValidationError = class _SQLValidationError extends Error {
|
|
721
|
+
[sqlValidationMarker];
|
|
722
|
+
constructor(message) {
|
|
723
|
+
super(message);
|
|
724
|
+
this.name = "SQLValidationError";
|
|
725
|
+
this[sqlValidationMarker] = true;
|
|
726
|
+
}
|
|
727
|
+
static isInstance(error) {
|
|
728
|
+
return error instanceof _SQLValidationError && error[sqlValidationMarker] === true;
|
|
729
|
+
}
|
|
730
|
+
};
|
|
731
|
+
var UnanswerableSQLError = class _UnanswerableSQLError extends Error {
|
|
732
|
+
[unanswerableSqlMarker];
|
|
733
|
+
constructor(message) {
|
|
734
|
+
super(message);
|
|
735
|
+
this.name = "UnanswerableSQLError";
|
|
736
|
+
this[unanswerableSqlMarker] = true;
|
|
737
|
+
}
|
|
738
|
+
static isInstance(error) {
|
|
739
|
+
return error instanceof _UnanswerableSQLError && error[unanswerableSqlMarker] === true;
|
|
740
|
+
}
|
|
741
|
+
};
|
|
742
|
+
|
|
717
743
|
// packages/text2sql/src/lib/agents/question.agent.ts
|
|
718
744
|
import { groq as groq4 } from "@ai-sdk/groq";
|
|
719
745
|
import dedent4 from "dedent";
|
|
@@ -838,42 +864,120 @@ import {
|
|
|
838
864
|
defaultSettingsMiddleware,
|
|
839
865
|
wrapLanguageModel
|
|
840
866
|
} from "ai";
|
|
867
|
+
import dedent5 from "dedent";
|
|
841
868
|
import pRetry from "p-retry";
|
|
842
869
|
import z5 from "zod";
|
|
843
870
|
import "@deepagents/agent";
|
|
844
871
|
import {
|
|
845
872
|
ContextEngine as ContextEngine5,
|
|
846
873
|
InMemoryContextStore as InMemoryContextStore5,
|
|
874
|
+
fragment as fragment5,
|
|
847
875
|
persona as persona5,
|
|
876
|
+
policy,
|
|
848
877
|
structuredOutput as structuredOutput5,
|
|
849
878
|
user as user5
|
|
850
879
|
} from "@deepagents/context";
|
|
851
880
|
var RETRY_TEMPERATURES = [0, 0.2, 0.3];
|
|
881
|
+
var SQL_AGENT_ROLE = "Expert SQL query generator.";
|
|
882
|
+
var SQL_AGENT_OBJECTIVE = "Generate precise SQL grounded in provided schema.";
|
|
883
|
+
var SQL_AGENT_POLICIES = [
|
|
884
|
+
fragment5(
|
|
885
|
+
"schema_mapping",
|
|
886
|
+
policy({
|
|
887
|
+
rule: "Translate natural language into precise SQL grounded in available schema entities."
|
|
888
|
+
})
|
|
889
|
+
// policy({
|
|
890
|
+
// rule: 'Before returning an error, perform a schema-grounded self-check: identify core intent, draft best-effort SQL, then verify it uses only existing tables/columns.',
|
|
891
|
+
// }),
|
|
892
|
+
// policy({
|
|
893
|
+
// rule: 'Return unanswerable only if that self-check confirms no valid SQL can express the required intent without inventing schema elements.',
|
|
894
|
+
// }),
|
|
895
|
+
// policy({
|
|
896
|
+
// rule: 'Prefer a best-effort valid SQL query when entities can be reasonably inferred from table or column names.',
|
|
897
|
+
// }),
|
|
898
|
+
// policy({
|
|
899
|
+
// rule: 'Use lexical normalization (singular/plural, paraphrases, role synonyms, and minor wording differences) to align question terms with schema names.',
|
|
900
|
+
// }),
|
|
901
|
+
// policy({
|
|
902
|
+
// rule: 'Decompose noun phrases into core entity and qualifiers, and map the core entity first.',
|
|
903
|
+
// }),
|
|
904
|
+
// policy({
|
|
905
|
+
// rule: 'Do not require every descriptive word to map to a separate schema field when the core entity match is unambiguous.',
|
|
906
|
+
// }),
|
|
907
|
+
// policy({
|
|
908
|
+
// rule: 'For phrases like "X of Y", treat Y as contextual (non-blocking) when Y has no mapped schema field and the question does not ask to filter/group/select by Y explicitly.',
|
|
909
|
+
// }),
|
|
910
|
+
// policy({
|
|
911
|
+
// rule: 'Treat unmatched qualifiers as blockers only when they are restrictive constraints (specific values, comparisons, or conditions that change row eligibility).',
|
|
912
|
+
// }),
|
|
913
|
+
// hint('Preserve schema spelling exactly, including typos in column names.'),
|
|
914
|
+
)
|
|
915
|
+
// fragment(
|
|
916
|
+
// 'unanswerable_gate',
|
|
917
|
+
// workflow({
|
|
918
|
+
// task: 'Unanswerable decision',
|
|
919
|
+
// steps: [
|
|
920
|
+
// 'Identify the core intent (metric/projection and required filters).',
|
|
921
|
+
// 'Attempt schema-grounded mapping for the core intent before considering error.',
|
|
922
|
+
// 'If a valid SELECT can answer the core intent without inventing schema entities, return SQL.',
|
|
923
|
+
// 'Return unanswerable only when required information cannot be mapped to any available table or column.',
|
|
924
|
+
// ],
|
|
925
|
+
// }),
|
|
926
|
+
// policy({
|
|
927
|
+
// rule: 'Do not reject a question as unanswerable when requested information can be derived by filtering, joining, grouping, counting, set operations, or sorting on available columns.',
|
|
928
|
+
// }),
|
|
929
|
+
// ),
|
|
930
|
+
// fragment(
|
|
931
|
+
// 'query_shape_preferences',
|
|
932
|
+
// hint(
|
|
933
|
+
// 'Prefer explicit INNER JOINs over LEFT JOINs unless the question requires unmatched rows.',
|
|
934
|
+
// ),
|
|
935
|
+
// hint(
|
|
936
|
+
// 'Prefer direct joins over dropping join constraints or using weaker alternatives.',
|
|
937
|
+
// ),
|
|
938
|
+
// hint('Use DISTINCT only when uniqueness is explicitly requested.'),
|
|
939
|
+
// hint(
|
|
940
|
+
// 'For superlatives over grouped entities (most/least/highest/lowest by group), prefer GROUP BY with ORDER BY aggregate and LIMIT 1.',
|
|
941
|
+
// ),
|
|
942
|
+
// hint(
|
|
943
|
+
// 'For average/count conditions per entity, prefer GROUP BY with HAVING aggregate predicates over row-level WHERE predicates.',
|
|
944
|
+
// ),
|
|
945
|
+
// hint(
|
|
946
|
+
// 'For "both" conditions across two criteria, prefer INTERSECT when selecting shared values.',
|
|
947
|
+
// ),
|
|
948
|
+
// hint(
|
|
949
|
+
// 'For "A or B" retrieval across criteria, prefer UNION when combining two qualifying sets.',
|
|
950
|
+
// ),
|
|
951
|
+
// hint(
|
|
952
|
+
// 'For "never" constraints against related records, prefer NOT IN or EXCEPT against the disqualifying set.',
|
|
953
|
+
// ),
|
|
954
|
+
// hint(
|
|
955
|
+
// 'Use equality predicates for exact values unless the question asks for pattern matching.',
|
|
956
|
+
// ),
|
|
957
|
+
// hint(
|
|
958
|
+
// 'Keep numeric literals unquoted when they are purely numeric tokens in the question.',
|
|
959
|
+
// ),
|
|
960
|
+
// ),
|
|
961
|
+
// fragment(
|
|
962
|
+
// 'sql_minimality',
|
|
963
|
+
// guardrail({
|
|
964
|
+
// rule: 'Never hallucinate tables or columns.',
|
|
965
|
+
// reason: 'Schema fidelity is required.',
|
|
966
|
+
// action: 'Use only available schema entities.',
|
|
967
|
+
// }),
|
|
968
|
+
// guardrail({
|
|
969
|
+
// rule: 'Prefer the minimal query over transformed expressions.',
|
|
970
|
+
// reason:
|
|
971
|
+
// 'Unnecessary transformations reduce correctness and add avoidable complexity.',
|
|
972
|
+
// action:
|
|
973
|
+
// 'Do not add date parsing, substring extraction, derived projections, or extra selected columns unless explicitly requested or required by schema mismatch.',
|
|
974
|
+
// }),
|
|
975
|
+
// ),
|
|
976
|
+
];
|
|
852
977
|
function extractSql(output) {
|
|
853
978
|
const match = output.match(/```sql\n?([\s\S]*?)```/);
|
|
854
979
|
return match ? match[1].trim() : output.trim();
|
|
855
980
|
}
|
|
856
|
-
var marker = Symbol("SQLValidationError");
|
|
857
|
-
var SQLValidationError = class _SQLValidationError extends Error {
|
|
858
|
-
[marker];
|
|
859
|
-
constructor(message) {
|
|
860
|
-
super(message);
|
|
861
|
-
this.name = "SQLValidationError";
|
|
862
|
-
this[marker] = true;
|
|
863
|
-
}
|
|
864
|
-
static isInstance(error) {
|
|
865
|
-
return error instanceof _SQLValidationError && error[marker] === true;
|
|
866
|
-
}
|
|
867
|
-
};
|
|
868
|
-
var UnanswerableSQLError = class _UnanswerableSQLError extends Error {
|
|
869
|
-
constructor(message) {
|
|
870
|
-
super(message);
|
|
871
|
-
this.name = "UnanswerableSQLError";
|
|
872
|
-
}
|
|
873
|
-
static isInstance(error) {
|
|
874
|
-
return error instanceof _UnanswerableSQLError;
|
|
875
|
-
}
|
|
876
|
-
};
|
|
877
981
|
async function toSql(options) {
|
|
878
982
|
const { maxRetries = 3 } = options;
|
|
879
983
|
return withRetry(
|
|
@@ -886,20 +990,38 @@ async function toSql(options) {
|
|
|
886
990
|
context.set(
|
|
887
991
|
persona5({
|
|
888
992
|
name: "Freya",
|
|
889
|
-
role:
|
|
890
|
-
objective:
|
|
993
|
+
role: SQL_AGENT_ROLE,
|
|
994
|
+
objective: SQL_AGENT_OBJECTIVE
|
|
995
|
+
// role: `You are a data science expert that provides well-reasoned and detailed responses.`,
|
|
996
|
+
// objective: `Your task is to understand the schema and generate a valid SQL query to answer the question. You first think about the reasoning process as an internal monologue and then provide the user with the answer.`,
|
|
891
997
|
}),
|
|
998
|
+
...SQL_AGENT_POLICIES,
|
|
892
999
|
...options.fragments
|
|
893
1000
|
);
|
|
894
1001
|
if (errors.length) {
|
|
1002
|
+
const lastError = errors.at(-1);
|
|
895
1003
|
context.set(
|
|
896
|
-
user5(
|
|
897
|
-
|
|
898
|
-
|
|
1004
|
+
user5(dedent5`
|
|
1005
|
+
Answer the following question with the SQL code. Use the piece of evidence and base your answer on the database schema.
|
|
1006
|
+
Given the question, the evidence and the database schema, return the SQL script that addresses the question.
|
|
1007
|
+
|
|
1008
|
+
Question: ${options.input}
|
|
1009
|
+
`),
|
|
1010
|
+
UnanswerableSQLError.isInstance(lastError) ? user5(
|
|
1011
|
+
`<retry_instruction>Your previous response marked the task as unanswerable. Re-evaluate using best-effort schema mapping. If the core intent is answerable with existing tables/columns, return SQL. Return error only when required core intent cannot be mapped without inventing schema elements.</retry_instruction>`
|
|
1012
|
+
) : user5(
|
|
1013
|
+
`<validation_error>Your previous SQL query had the following error: ${lastError?.message}. Please fix the query.</validation_error>`
|
|
899
1014
|
)
|
|
900
1015
|
);
|
|
901
1016
|
} else {
|
|
902
|
-
context.set(
|
|
1017
|
+
context.set(
|
|
1018
|
+
user5(dedent5`
|
|
1019
|
+
Answer the following question with the SQL code. Use the piece of evidence and base your answer on the database schema.
|
|
1020
|
+
Given the question, the evidence and the database schema, return the SQL script that addresses the question.
|
|
1021
|
+
|
|
1022
|
+
Question: ${options.input}
|
|
1023
|
+
`)
|
|
1024
|
+
);
|
|
903
1025
|
}
|
|
904
1026
|
const temperature = RETRY_TEMPERATURES[attemptNumber - 1] ?? RETRY_TEMPERATURES[RETRY_TEMPERATURES.length - 1];
|
|
905
1027
|
const baseModel = options.model ?? groq5("openai/gpt-oss-20b");
|
|
@@ -925,19 +1047,45 @@ async function toSql(options) {
|
|
|
925
1047
|
})
|
|
926
1048
|
});
|
|
927
1049
|
const { result: output } = await sqlOutput.generate();
|
|
1050
|
+
const finalizeSql = async (rawSql) => {
|
|
1051
|
+
const sql = options.adapter.format(extractSql(rawSql));
|
|
1052
|
+
const validationError = await options.adapter.validate(sql);
|
|
1053
|
+
if (validationError) {
|
|
1054
|
+
throw new SQLValidationError(validationError);
|
|
1055
|
+
}
|
|
1056
|
+
return {
|
|
1057
|
+
attempts,
|
|
1058
|
+
sql,
|
|
1059
|
+
errors: errors.length ? errors.map(formatErrorMessage) : void 0
|
|
1060
|
+
};
|
|
1061
|
+
};
|
|
928
1062
|
if ("error" in output) {
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
1063
|
+
context.set(
|
|
1064
|
+
user5(
|
|
1065
|
+
"<best_effort_fallback>Do not return unanswerable. Produce the best valid SQL query that answers the core intent using only available schema entities.</best_effort_fallback>"
|
|
1066
|
+
)
|
|
1067
|
+
);
|
|
1068
|
+
const forcedSqlOutput = structuredOutput5({
|
|
1069
|
+
model,
|
|
1070
|
+
context,
|
|
1071
|
+
schema: z5.object({
|
|
1072
|
+
sql: z5.string().describe(
|
|
1073
|
+
"Best-effort SQL query that answers the core intent using only available schema entities."
|
|
1074
|
+
),
|
|
1075
|
+
reasoning: z5.string().describe("Reasoning steps for best-effort schema mapping.")
|
|
1076
|
+
})
|
|
1077
|
+
});
|
|
1078
|
+
try {
|
|
1079
|
+
const forced = await forcedSqlOutput.generate();
|
|
1080
|
+
return await finalizeSql(forced.sql);
|
|
1081
|
+
} catch (error) {
|
|
1082
|
+
if (SQLValidationError.isInstance(error) || APICallError.isInstance(error) || JSONParseError.isInstance(error) || TypeValidationError.isInstance(error) || NoObjectGeneratedError.isInstance(error) || NoOutputGeneratedError.isInstance(error) || NoContentGeneratedError.isInstance(error)) {
|
|
1083
|
+
throw error;
|
|
1084
|
+
}
|
|
1085
|
+
throw new UnanswerableSQLError(output.error);
|
|
1086
|
+
}
|
|
935
1087
|
}
|
|
936
|
-
return
|
|
937
|
-
attempts,
|
|
938
|
-
sql,
|
|
939
|
-
errors: errors.length ? errors.map(formatErrorMessage) : void 0
|
|
940
|
-
};
|
|
1088
|
+
return await finalizeSql(output.sql);
|
|
941
1089
|
},
|
|
942
1090
|
{ retries: maxRetries - 1 }
|
|
943
1091
|
);
|
|
@@ -1000,9 +1148,6 @@ async function withRetry(computation, options = { retries: 3 }) {
|
|
|
1000
1148
|
return APICallError.isInstance(context.error) || JSONParseError.isInstance(context.error) || TypeValidationError.isInstance(context.error) || NoObjectGeneratedError.isInstance(context.error) || NoOutputGeneratedError.isInstance(context.error) || NoContentGeneratedError.isInstance(context.error);
|
|
1001
1149
|
},
|
|
1002
1150
|
onFailedAttempt(context) {
|
|
1003
|
-
console.log(
|
|
1004
|
-
`Attempt ${context.attemptNumber} failed. There are ${context.retriesLeft} retries left.`
|
|
1005
|
-
);
|
|
1006
1151
|
errors.push(context.error);
|
|
1007
1152
|
}
|
|
1008
1153
|
}
|
|
@@ -1104,14 +1249,14 @@ Generate ${this.options.count} questions at ${complexity} complexity.` : void 0;
|
|
|
1104
1249
|
|
|
1105
1250
|
// packages/text2sql/src/lib/synthesis/synthesizers/breadth-evolver.ts
|
|
1106
1251
|
import { groq as groq6 } from "@ai-sdk/groq";
|
|
1107
|
-
import
|
|
1252
|
+
import dedent6 from "dedent";
|
|
1108
1253
|
import pLimit2 from "p-limit";
|
|
1109
1254
|
import z6 from "zod";
|
|
1110
1255
|
import "@deepagents/agent";
|
|
1111
1256
|
import {
|
|
1112
1257
|
ContextEngine as ContextEngine6,
|
|
1113
1258
|
InMemoryContextStore as InMemoryContextStore6,
|
|
1114
|
-
fragment as
|
|
1259
|
+
fragment as fragment6,
|
|
1115
1260
|
guardrail as guardrail2,
|
|
1116
1261
|
persona as personaFragment,
|
|
1117
1262
|
structuredOutput as structuredOutput6,
|
|
@@ -1163,7 +1308,7 @@ async function paraphraseQuestion(params) {
|
|
|
1163
1308
|
chatId: `paraphraser-${crypto.randomUUID()}`,
|
|
1164
1309
|
userId: "system"
|
|
1165
1310
|
});
|
|
1166
|
-
const personaInstruction = params.persona ?
|
|
1311
|
+
const personaInstruction = params.persona ? dedent6`
|
|
1167
1312
|
<persona role="${params.persona.role}">
|
|
1168
1313
|
${params.persona.perspective}
|
|
1169
1314
|
|
|
@@ -1171,7 +1316,7 @@ async function paraphraseQuestion(params) {
|
|
|
1171
1316
|
Use their vocabulary, priorities, and framing style.
|
|
1172
1317
|
</persona>
|
|
1173
1318
|
` : "";
|
|
1174
|
-
const styleInstruction = params.persona?.styles && params.persona.styles.length > 0 ?
|
|
1319
|
+
const styleInstruction = params.persona?.styles && params.persona.styles.length > 0 ? dedent6`
|
|
1175
1320
|
<communication_styles>
|
|
1176
1321
|
Generate paraphrases using these communication styles: ${params.persona.styles.join(", ")}
|
|
1177
1322
|
|
|
@@ -1187,17 +1332,17 @@ async function paraphraseQuestion(params) {
|
|
|
1187
1332
|
role: "You are a linguistic expert specializing in paraphrasing database questions. Your task is to generate alternative phrasings of questions that preserve the exact same semantic meaning - they must all produce the identical SQL query.",
|
|
1188
1333
|
objective: "Generate paraphrased versions of questions that preserve exact semantic meaning and produce identical SQL"
|
|
1189
1334
|
}),
|
|
1190
|
-
|
|
1191
|
-
|
|
1335
|
+
fragment6("original_question", params.question),
|
|
1336
|
+
fragment6(
|
|
1192
1337
|
"reference_sql",
|
|
1193
1338
|
params.sql,
|
|
1194
1339
|
"This SQL shows what the question is really asking - all paraphrases must ask for exactly this"
|
|
1195
1340
|
),
|
|
1196
|
-
...personaInstruction ? [
|
|
1197
|
-
...styleInstruction ? [
|
|
1198
|
-
|
|
1341
|
+
...personaInstruction ? [fragment6("persona", personaInstruction)] : [],
|
|
1342
|
+
...styleInstruction ? [fragment6("communication_styles", styleInstruction)] : [],
|
|
1343
|
+
fragment6(
|
|
1199
1344
|
"task",
|
|
1200
|
-
|
|
1345
|
+
dedent6`
|
|
1201
1346
|
Generate exactly ${params.count} paraphrased versions of the original question.
|
|
1202
1347
|
|
|
1203
1348
|
Requirements:
|
|
@@ -1274,7 +1419,7 @@ var BreadthEvolver = class extends PairProducer {
|
|
|
1274
1419
|
// packages/text2sql/src/lib/synthesis/synthesizers/depth-evolver.ts
|
|
1275
1420
|
import { groq as groq7 } from "@ai-sdk/groq";
|
|
1276
1421
|
import { NoObjectGeneratedError as NoObjectGeneratedError2, NoOutputGeneratedError as NoOutputGeneratedError2 } from "ai";
|
|
1277
|
-
import
|
|
1422
|
+
import dedent7 from "dedent";
|
|
1278
1423
|
import pLimit3 from "p-limit";
|
|
1279
1424
|
import pRetry2 from "p-retry";
|
|
1280
1425
|
import z7 from "zod";
|
|
@@ -1282,14 +1427,14 @@ import "@deepagents/agent";
|
|
|
1282
1427
|
import {
|
|
1283
1428
|
ContextEngine as ContextEngine7,
|
|
1284
1429
|
InMemoryContextStore as InMemoryContextStore7,
|
|
1285
|
-
fragment as
|
|
1430
|
+
fragment as fragment7,
|
|
1286
1431
|
guardrail as guardrail3,
|
|
1287
1432
|
persona as persona6,
|
|
1288
1433
|
structuredOutput as structuredOutput7,
|
|
1289
1434
|
user as user7
|
|
1290
1435
|
} from "@deepagents/context";
|
|
1291
1436
|
var techniqueInstructions = {
|
|
1292
|
-
"add-aggregation":
|
|
1437
|
+
"add-aggregation": dedent7`
|
|
1293
1438
|
Add aggregation requirements to the question.
|
|
1294
1439
|
Transform it to require GROUP BY, COUNT, SUM, AVG, MIN, MAX, or similar operations.
|
|
1295
1440
|
Examples:
|
|
@@ -1297,7 +1442,7 @@ var techniqueInstructions = {
|
|
|
1297
1442
|
- "List products" → "What is the average price per category?"
|
|
1298
1443
|
- "Get employees" → "How many employees are in each department?"
|
|
1299
1444
|
`,
|
|
1300
|
-
"add-filter":
|
|
1445
|
+
"add-filter": dedent7`
|
|
1301
1446
|
Add filtering conditions to the question.
|
|
1302
1447
|
Transform it to require WHERE clauses with specific conditions.
|
|
1303
1448
|
Examples:
|
|
@@ -1305,7 +1450,7 @@ var techniqueInstructions = {
|
|
|
1305
1450
|
- "List customers" → "List customers who have made more than 5 purchases"
|
|
1306
1451
|
- "Get products" → "Get products with price above $100"
|
|
1307
1452
|
`,
|
|
1308
|
-
"add-join":
|
|
1453
|
+
"add-join": dedent7`
|
|
1309
1454
|
Add requirements that need data from related tables.
|
|
1310
1455
|
Transform it to require JOIN operations between multiple tables.
|
|
1311
1456
|
Examples:
|
|
@@ -1313,7 +1458,7 @@ var techniqueInstructions = {
|
|
|
1313
1458
|
- "List products" → "List products with their supplier information"
|
|
1314
1459
|
- "Get employees" → "Get employees with their department and manager names"
|
|
1315
1460
|
`,
|
|
1316
|
-
"add-reasoning":
|
|
1461
|
+
"add-reasoning": dedent7`
|
|
1317
1462
|
Add multi-step reasoning requirements.
|
|
1318
1463
|
Transform it to require logical deduction, comparisons, or derived calculations.
|
|
1319
1464
|
Examples:
|
|
@@ -1321,7 +1466,7 @@ var techniqueInstructions = {
|
|
|
1321
1466
|
- "List products" → "Which products are underperforming compared to their category average?"
|
|
1322
1467
|
- "Get revenue" → "Which month had the highest growth compared to the previous month?"
|
|
1323
1468
|
`,
|
|
1324
|
-
hypothetical:
|
|
1469
|
+
hypothetical: dedent7`
|
|
1325
1470
|
Add a hypothetical or speculative scenario.
|
|
1326
1471
|
Transform it to require applying calculations or projections.
|
|
1327
1472
|
Examples:
|
|
@@ -1345,21 +1490,21 @@ async function evolveQuestion(params) {
|
|
|
1345
1490
|
role: "You are an expert at evolving simple database questions into more complex ones. Your task is to take a basic question and transform it into a more sophisticated version that requires advanced SQL techniques to answer.",
|
|
1346
1491
|
objective: "Transform simple questions into complex versions requiring advanced SQL techniques"
|
|
1347
1492
|
}),
|
|
1348
|
-
|
|
1349
|
-
|
|
1493
|
+
fragment7("original_question", params.question),
|
|
1494
|
+
fragment7(
|
|
1350
1495
|
"original_sql",
|
|
1351
1496
|
params.sql,
|
|
1352
1497
|
"(This shows what the original question required)"
|
|
1353
1498
|
),
|
|
1354
|
-
|
|
1355
|
-
|
|
1499
|
+
fragment7("database_schema", params.schema),
|
|
1500
|
+
fragment7(
|
|
1356
1501
|
"technique",
|
|
1357
1502
|
{ name: params.technique },
|
|
1358
1503
|
params.techniqueInstruction
|
|
1359
1504
|
),
|
|
1360
|
-
|
|
1505
|
+
fragment7(
|
|
1361
1506
|
"task",
|
|
1362
|
-
|
|
1507
|
+
dedent7`
|
|
1363
1508
|
Evolve the original question using the "${params.technique}" technique.
|
|
1364
1509
|
|
|
1365
1510
|
Requirements:
|
|
@@ -1502,14 +1647,14 @@ async function withRetry2(computation) {
|
|
|
1502
1647
|
|
|
1503
1648
|
// packages/text2sql/src/lib/synthesis/synthesizers/persona-generator.ts
|
|
1504
1649
|
import { groq as groq8 } from "@ai-sdk/groq";
|
|
1505
|
-
import
|
|
1650
|
+
import dedent8 from "dedent";
|
|
1506
1651
|
import z8 from "zod";
|
|
1507
1652
|
import "@deepagents/agent";
|
|
1508
1653
|
import {
|
|
1509
1654
|
ContextEngine as ContextEngine8,
|
|
1510
1655
|
InMemoryContextStore as InMemoryContextStore8,
|
|
1511
1656
|
XmlRenderer,
|
|
1512
|
-
fragment as
|
|
1657
|
+
fragment as fragment8,
|
|
1513
1658
|
guardrail as guardrail4,
|
|
1514
1659
|
persona as personaFragment2,
|
|
1515
1660
|
structuredOutput as structuredOutput8,
|
|
@@ -1542,10 +1687,10 @@ async function generatePersonas(schemaFragments, options) {
|
|
|
1542
1687
|
role: "You are an expert at understanding database schemas and inferring who would use them.",
|
|
1543
1688
|
objective: "Generate realistic personas representing users who would query this database"
|
|
1544
1689
|
}),
|
|
1545
|
-
|
|
1546
|
-
|
|
1690
|
+
fragment8("database_schema", schema),
|
|
1691
|
+
fragment8(
|
|
1547
1692
|
"task",
|
|
1548
|
-
|
|
1693
|
+
dedent8`
|
|
1549
1694
|
Analyze the database schema and generate realistic personas representing
|
|
1550
1695
|
the different types of users who would query this database.
|
|
1551
1696
|
|
|
@@ -1576,9 +1721,9 @@ async function generatePersonas(schemaFragments, options) {
|
|
|
1576
1721
|
- Styles should match how this persona would naturally communicate
|
|
1577
1722
|
`
|
|
1578
1723
|
),
|
|
1579
|
-
|
|
1724
|
+
fragment8(
|
|
1580
1725
|
"example",
|
|
1581
|
-
|
|
1726
|
+
dedent8`
|
|
1582
1727
|
For an e-commerce schema with orders, customers, products tables:
|
|
1583
1728
|
|
|
1584
1729
|
{
|
|
@@ -1621,7 +1766,7 @@ import { XmlRenderer as XmlRenderer2 } from "@deepagents/context";
|
|
|
1621
1766
|
|
|
1622
1767
|
// packages/text2sql/src/lib/agents/teachables.agent.ts
|
|
1623
1768
|
import { groq as groq9 } from "@ai-sdk/groq";
|
|
1624
|
-
import
|
|
1769
|
+
import dedent9 from "dedent";
|
|
1625
1770
|
import z9 from "zod";
|
|
1626
1771
|
import "@deepagents/agent";
|
|
1627
1772
|
import {
|
|
@@ -1631,7 +1776,7 @@ import {
|
|
|
1631
1776
|
clarification,
|
|
1632
1777
|
example,
|
|
1633
1778
|
explain,
|
|
1634
|
-
fragment as
|
|
1779
|
+
fragment as fragment9,
|
|
1635
1780
|
guardrail as guardrail5,
|
|
1636
1781
|
hint,
|
|
1637
1782
|
persona as persona7,
|
|
@@ -1705,11 +1850,11 @@ async function toTeachings(input, options) {
|
|
|
1705
1850
|
role: 'You design "fragments" for a Text2SQL system. Fragments become structured XML instructions.',
|
|
1706
1851
|
objective: "Choose only high-impact items that improve accuracy, safety, or clarity for this database"
|
|
1707
1852
|
}),
|
|
1708
|
-
|
|
1709
|
-
...input.context ? [
|
|
1710
|
-
|
|
1853
|
+
fragment9("database_schema", input.schema),
|
|
1854
|
+
...input.context ? [fragment9("additional_context", input.context)] : [],
|
|
1855
|
+
fragment9(
|
|
1711
1856
|
"output_structure",
|
|
1712
|
-
|
|
1857
|
+
dedent9`
|
|
1713
1858
|
Output a JSON object with these optional arrays (include only relevant ones):
|
|
1714
1859
|
- terms: [{ name: string, definition: string }] - Domain terminology
|
|
1715
1860
|
- hints: [{ text: string }] - Helpful SQL generation hints
|
|
@@ -1723,9 +1868,9 @@ async function toTeachings(input, options) {
|
|
|
1723
1868
|
- analogies: [{ concepts: string[], relationship: string, insight?: string, therefore?: string, pitfall?: string }]
|
|
1724
1869
|
`
|
|
1725
1870
|
),
|
|
1726
|
-
|
|
1871
|
+
fragment9(
|
|
1727
1872
|
"task",
|
|
1728
|
-
|
|
1873
|
+
dedent9`
|
|
1729
1874
|
1. Analyze the schema to infer domain, relationships, and sensitive columns.
|
|
1730
1875
|
2. Generate 3-10 fragments total across all categories, prioritizing:
|
|
1731
1876
|
- guardrails for PII columns (email, ssn, phone, etc)
|