@deepagents/text2sql 0.9.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1231 @@
1
+ // packages/text2sql/src/lib/adapters/groundings/context.ts
2
+ function createGroundingContext() {
3
+ return {
4
+ tables: [],
5
+ views: [],
6
+ relationships: [],
7
+ info: void 0
8
+ };
9
+ }
10
+
11
+ // packages/text2sql/src/lib/adapters/adapter.ts
12
+ var Adapter = class {
13
+ async introspect(ctx = createGroundingContext()) {
14
+ const lines = [];
15
+ for (const fn of this.grounding) {
16
+ const grounding = fn(this);
17
+ lines.push({
18
+ tag: grounding.tag,
19
+ fn: await grounding.execute(ctx)
20
+ });
21
+ }
22
+ return lines.map(({ fn, tag }) => {
23
+ const description = fn();
24
+ if (description === null) {
25
+ return "";
26
+ }
27
+ return `<${tag}>
28
+ ${description}
29
+ </${tag}>`;
30
+ }).join("\n");
31
+ }
32
+ /**
33
+ * Convert unknown database value to number.
34
+ * Handles number, bigint, and string types.
35
+ */
36
+ toNumber(value) {
37
+ if (typeof value === "number" && Number.isFinite(value)) {
38
+ return value;
39
+ }
40
+ if (typeof value === "bigint") {
41
+ return Number(value);
42
+ }
43
+ if (typeof value === "string" && value.trim() !== "") {
44
+ const parsed = Number(value);
45
+ return Number.isFinite(parsed) ? parsed : void 0;
46
+ }
47
+ return void 0;
48
+ }
49
+ /**
50
+ * Parse a potentially qualified table name into schema and table parts.
51
+ */
52
+ parseTableName(name) {
53
+ if (name.includes(".")) {
54
+ const [schema, ...rest] = name.split(".");
55
+ return { schema, table: rest.join(".") };
56
+ }
57
+ return { schema: this.defaultSchema ?? "", table: name };
58
+ }
59
+ /**
60
+ * Escape a string value for use in SQL string literals (single quotes).
61
+ * Used in WHERE clauses like: WHERE name = '${escapeString(value)}'
62
+ */
63
+ escapeString(value) {
64
+ return value.replace(/'/g, "''");
65
+ }
66
+ /**
67
+ * Build a SQL filter clause to include/exclude schemas.
68
+ * @param columnName - The schema column name (e.g., 'TABLE_SCHEMA')
69
+ * @param allowedSchemas - If provided, filter to these schemas only
70
+ */
71
+ buildSchemaFilter(columnName, allowedSchemas) {
72
+ if (allowedSchemas && allowedSchemas.length > 0) {
73
+ const values = allowedSchemas.map((s) => `'${this.escapeString(s)}'`).join(", ");
74
+ return `AND ${columnName} IN (${values})`;
75
+ }
76
+ if (this.systemSchemas.length > 0) {
77
+ const values = this.systemSchemas.map((s) => `'${this.escapeString(s)}'`).join(", ");
78
+ return `AND ${columnName} NOT IN (${values})`;
79
+ }
80
+ return "";
81
+ }
82
+ };
83
+
84
+ // packages/text2sql/src/lib/adapters/groundings/abstract.grounding.ts
85
+ var AbstractGrounding = class {
86
+ tag;
87
+ constructor(tag) {
88
+ this.tag = tag;
89
+ }
90
+ };
91
+
92
+ // packages/text2sql/src/lib/adapters/groundings/column-stats.grounding.ts
93
+ var ColumnStatsGrounding = class extends AbstractGrounding {
94
+ constructor(config = {}) {
95
+ super("column_stats");
96
+ }
97
+ /**
98
+ * Execute the grounding process.
99
+ * Annotates columns in ctx.tables and ctx.views with statistics.
100
+ */
101
+ async execute(ctx) {
102
+ const allContainers = [...ctx.tables, ...ctx.views];
103
+ for (const container of allContainers) {
104
+ for (const column of container.columns) {
105
+ try {
106
+ const stats = await this.collectStats(container.name, column);
107
+ if (stats) {
108
+ column.stats = stats;
109
+ }
110
+ } catch (error) {
111
+ console.warn(
112
+ "Error collecting stats for",
113
+ container.name,
114
+ column.name,
115
+ error
116
+ );
117
+ }
118
+ }
119
+ }
120
+ return () => this.#describe();
121
+ }
122
+ #describe() {
123
+ return null;
124
+ }
125
+ };
126
+
127
+ // packages/text2sql/src/lib/adapters/groundings/info.grounding.ts
128
+ var InfoGrounding = class extends AbstractGrounding {
129
+ constructor(config = {}) {
130
+ super("dialect_info");
131
+ }
132
+ /**
133
+ * Execute the grounding process.
134
+ * Writes database info to ctx.info.
135
+ */
136
+ async execute(ctx) {
137
+ ctx.info = await this.collectInfo();
138
+ const lines = [`Dialect: ${ctx.info.dialect ?? "unknown"}`];
139
+ if (ctx.info.version) {
140
+ lines.push(`Version: ${ctx.info.version}`);
141
+ }
142
+ if (ctx.info.database) {
143
+ lines.push(`Database: ${ctx.info.database}`);
144
+ }
145
+ if (ctx.info.details && Object.keys(ctx.info.details).length) {
146
+ lines.push(`Details: ${JSON.stringify(ctx.info.details)}`);
147
+ }
148
+ return () => lines.join("\n");
149
+ }
150
+ };
151
+
152
+ // packages/text2sql/src/lib/adapters/groundings/column-values.grounding.ts
153
+ var ColumnValuesGrounding = class extends AbstractGrounding {
154
+ lowCardinalityLimit;
155
+ constructor(config = {}) {
156
+ super("column_values");
157
+ this.lowCardinalityLimit = config.lowCardinalityLimit ?? 20;
158
+ }
159
+ /**
160
+ * Get values for native ENUM type columns.
161
+ * Return undefined if column is not an ENUM type.
162
+ * Default implementation returns undefined (no native ENUM support).
163
+ */
164
+ async collectEnumValues(_tableName, _column) {
165
+ return void 0;
166
+ }
167
+ /**
168
+ * Parse CHECK constraint for enum-like IN clause.
169
+ * Extracts values from patterns like:
170
+ * - CHECK (status IN ('active', 'inactive'))
171
+ * - CHECK ((status)::text = ANY (ARRAY['a'::text, 'b'::text]))
172
+ * - CHECK (status = 'active' OR status = 'inactive')
173
+ */
174
+ parseCheckConstraint(constraint, columnName) {
175
+ if (constraint.type !== "CHECK" || !constraint.definition) {
176
+ return void 0;
177
+ }
178
+ if (constraint.columns && !constraint.columns.includes(columnName)) {
179
+ return void 0;
180
+ }
181
+ const def = constraint.definition;
182
+ const escapedCol = this.escapeRegex(columnName);
183
+ const colPattern = `(?:\\(?\\(?${escapedCol}\\)?(?:::(?:text|varchar|character varying))?\\)?)`;
184
+ const inMatch = def.match(
185
+ new RegExp(`${colPattern}\\s+IN\\s*\\(([^)]+)\\)`, "i")
186
+ );
187
+ if (inMatch) {
188
+ return this.extractStringValues(inMatch[1]);
189
+ }
190
+ const anyMatch = def.match(
191
+ new RegExp(
192
+ `${colPattern}\\s*=\\s*ANY\\s*\\(\\s*(?:ARRAY)?\\s*\\[([^\\]]+)\\]`,
193
+ "i"
194
+ )
195
+ );
196
+ if (anyMatch) {
197
+ return this.extractStringValues(anyMatch[1]);
198
+ }
199
+ const orPattern = new RegExp(
200
+ `\\b${this.escapeRegex(columnName)}\\b\\s*=\\s*'([^']*)'`,
201
+ "gi"
202
+ );
203
+ const orMatches = [...def.matchAll(orPattern)];
204
+ if (orMatches.length >= 2) {
205
+ return orMatches.map((m) => m[1]);
206
+ }
207
+ return void 0;
208
+ }
209
+ /**
210
+ * Extract string values from a comma-separated list.
211
+ */
212
+ extractStringValues(input) {
213
+ const values = [];
214
+ const matches = input.matchAll(/'([^']*)'/g);
215
+ for (const match of matches) {
216
+ values.push(match[1]);
217
+ }
218
+ return values.length > 0 ? values : void 0;
219
+ }
220
+ /**
221
+ * Escape special regex characters in a string.
222
+ */
223
+ escapeRegex(str) {
224
+ return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
225
+ }
226
+ /**
227
+ * Get the table from context by name.
228
+ */
229
+ getTable(ctx, name) {
230
+ return ctx.tables.find((t) => t.name === name);
231
+ }
232
+ /**
233
+ * Execute the grounding process.
234
+ * Annotates columns in ctx.tables and ctx.views with values.
235
+ */
236
+ async execute(ctx) {
237
+ const allContainers = [...ctx.tables, ...ctx.views];
238
+ for (const container of allContainers) {
239
+ const table = this.getTable(ctx, container.name);
240
+ for (const column of container.columns) {
241
+ try {
242
+ const result = await this.resolveColumnValues(
243
+ container.name,
244
+ column,
245
+ table?.constraints
246
+ );
247
+ if (result) {
248
+ column.kind = result.kind;
249
+ column.values = result.values;
250
+ }
251
+ } catch (error) {
252
+ console.warn(
253
+ "Error collecting column values for",
254
+ container.name,
255
+ column.name,
256
+ error
257
+ );
258
+ }
259
+ }
260
+ }
261
+ return () => this.#describe();
262
+ }
263
+ /**
264
+ * Resolve column values from all sources in priority order.
265
+ */
266
+ async resolveColumnValues(tableName, column, constraints) {
267
+ const enumValues = await this.collectEnumValues(tableName, column);
268
+ if (enumValues?.length) {
269
+ return { kind: "Enum", values: enumValues };
270
+ }
271
+ if (constraints) {
272
+ for (const constraint of constraints) {
273
+ const checkValues = this.parseCheckConstraint(constraint, column.name);
274
+ if (checkValues?.length) {
275
+ return { kind: "Enum", values: checkValues };
276
+ }
277
+ }
278
+ }
279
+ const lowCardValues = await this.collectLowCardinality(tableName, column);
280
+ if (lowCardValues?.length) {
281
+ return { kind: "LowCardinality", values: lowCardValues };
282
+ }
283
+ return void 0;
284
+ }
285
+ #describe() {
286
+ return null;
287
+ }
288
+ };
289
+
290
+ // packages/text2sql/src/lib/adapters/groundings/report.grounding.ts
291
+ import { groq } from "@ai-sdk/groq";
292
+ import { tool } from "ai";
293
+ import dedent from "dedent";
294
+ import z from "zod";
295
+ import {
296
+ agent,
297
+ generate,
298
+ toState,
299
+ user
300
+ } from "@deepagents/agent";
301
+ var reportAgent = agent({
302
+ name: "db-report-agent",
303
+ model: groq("openai/gpt-oss-20b"),
304
+ prompt: () => dedent`
305
+ <identity>
306
+ You are a database analyst expert. Your job is to understand what
307
+ a database represents and provide business context about it.
308
+ You have READ-ONLY access to the database.
309
+ </identity>
310
+
311
+ <instructions>
312
+ Write a business context that helps another agent answer questions accurately.
313
+
314
+ For EACH table, do queries ONE AT A TIME:
315
+ 1. SELECT COUNT(*) to get row count
316
+ 2. SELECT * LIMIT 3 to see sample data
317
+
318
+ Then write a report with:
319
+ - What business this database is for
320
+ - For each table: purpose, row count, and example of what the data looks like
321
+
322
+ Include concrete examples like "Track prices are $0.99",
323
+ "Customer names like 'Luís Gonçalves'", etc.
324
+
325
+ Keep it 400-600 words, conversational style.
326
+ </instructions>
327
+ `,
328
+ tools: {
329
+ query_database: tool({
330
+ description: "Execute a SELECT query to explore the database and gather insights.",
331
+ inputSchema: z.object({
332
+ sql: z.string().describe("The SELECT query to execute"),
333
+ purpose: z.string().describe("What insight you are trying to gather with this query")
334
+ }),
335
+ execute: ({ sql }, options) => {
336
+ const state = toState(options);
337
+ return state.adapter.execute(sql);
338
+ }
339
+ })
340
+ }
341
+ });
342
+
343
+ // packages/text2sql/src/lib/adapters/groundings/row-count.grounding.ts
344
+ var RowCountGrounding = class extends AbstractGrounding {
345
+ constructor(config = {}) {
346
+ super("row_counts");
347
+ }
348
+ /**
349
+ * Execute the grounding process.
350
+ * Annotates tables in ctx.tables with row counts and size hints.
351
+ */
352
+ async execute(ctx) {
353
+ for (const table of ctx.tables) {
354
+ const count = await this.getRowCount(table.name);
355
+ if (count != null) {
356
+ table.rowCount = count;
357
+ table.sizeHint = this.#classifyRowCount(count);
358
+ }
359
+ }
360
+ return () => null;
361
+ }
362
+ /**
363
+ * Classify row count into a size hint category.
364
+ */
365
+ #classifyRowCount(count) {
366
+ if (count < 100) return "tiny";
367
+ if (count < 1e3) return "small";
368
+ if (count < 1e4) return "medium";
369
+ if (count < 1e5) return "large";
370
+ return "huge";
371
+ }
372
+ };
373
+
374
+ // packages/text2sql/src/lib/adapters/groundings/table.grounding.ts
375
+ import pluralize from "pluralize";
376
+ var TableGrounding = class extends AbstractGrounding {
377
+ #filter;
378
+ #forward;
379
+ #backward;
380
+ constructor(config = {}) {
381
+ super("tables");
382
+ this.#filter = config.filter;
383
+ this.#forward = config.forward;
384
+ this.#backward = config.backward;
385
+ }
386
+ /**
387
+ * Execute the grounding process.
388
+ * Writes discovered tables and relationships to the context.
389
+ */
390
+ async execute(ctx) {
391
+ const seedTables = await this.applyFilter();
392
+ const forward = this.#forward;
393
+ const backward = this.#backward;
394
+ if (!forward && !backward) {
395
+ const tables3 = await Promise.all(
396
+ seedTables.map((name) => this.getTable(name))
397
+ );
398
+ ctx.tables.push(...tables3);
399
+ return () => this.#describeTables(tables3);
400
+ }
401
+ const tables2 = {};
402
+ const allRelationships = [];
403
+ const seenRelationships = /* @__PURE__ */ new Set();
404
+ const forwardQueue = [];
405
+ const backwardQueue = [];
406
+ const forwardVisited = /* @__PURE__ */ new Set();
407
+ const backwardVisited = /* @__PURE__ */ new Set();
408
+ for (const name of seedTables) {
409
+ if (forward) forwardQueue.push({ name, depth: 0 });
410
+ if (backward) backwardQueue.push({ name, depth: 0 });
411
+ }
412
+ const forwardLimit = forward === true ? Infinity : forward || 0;
413
+ while (forwardQueue.length > 0) {
414
+ const item = forwardQueue.shift();
415
+ if (!item) break;
416
+ const { name, depth } = item;
417
+ if (forwardVisited.has(name)) continue;
418
+ forwardVisited.add(name);
419
+ if (!tables2[name]) {
420
+ tables2[name] = await this.getTable(name);
421
+ }
422
+ if (depth < forwardLimit) {
423
+ const rels = await this.findOutgoingRelations(name);
424
+ for (const rel of rels) {
425
+ this.addRelationship(rel, allRelationships, seenRelationships);
426
+ if (!forwardVisited.has(rel.referenced_table)) {
427
+ forwardQueue.push({ name: rel.referenced_table, depth: depth + 1 });
428
+ }
429
+ }
430
+ }
431
+ }
432
+ const backwardLimit = backward === true ? Infinity : backward || 0;
433
+ while (backwardQueue.length > 0) {
434
+ const item = backwardQueue.shift();
435
+ if (!item) break;
436
+ const { name, depth } = item;
437
+ if (backwardVisited.has(name)) continue;
438
+ backwardVisited.add(name);
439
+ if (!tables2[name]) {
440
+ tables2[name] = await this.getTable(name);
441
+ }
442
+ if (depth < backwardLimit) {
443
+ const rels = await this.findIncomingRelations(name);
444
+ for (const rel of rels) {
445
+ this.addRelationship(rel, allRelationships, seenRelationships);
446
+ if (!backwardVisited.has(rel.table)) {
447
+ backwardQueue.push({ name: rel.table, depth: depth + 1 });
448
+ }
449
+ }
450
+ }
451
+ }
452
+ const tablesList = Object.values(tables2);
453
+ ctx.tables.push(...tablesList);
454
+ ctx.relationships.push(...allRelationships);
455
+ return () => this.#describeTables(tablesList);
456
+ }
457
+ /**
458
+ * Apply the filter to get seed table names.
459
+ * If filter is an explicit array, skip querying all table names.
460
+ */
461
+ async applyFilter() {
462
+ const filter = this.#filter;
463
+ if (Array.isArray(filter)) {
464
+ return filter;
465
+ }
466
+ const names = await this.getAllTableNames();
467
+ if (!filter) {
468
+ return names;
469
+ }
470
+ if (filter instanceof RegExp) {
471
+ return names.filter((name) => filter.test(name));
472
+ }
473
+ return names.filter(filter);
474
+ }
475
+ /**
476
+ * Add a relationship to the collection, deduplicating by key.
477
+ */
478
+ addRelationship(rel, all, seen) {
479
+ const key = `${rel.table}:${rel.from.join(",")}:${rel.referenced_table}:${rel.to.join(",")}`;
480
+ if (!seen.has(key)) {
481
+ seen.add(key);
482
+ all.push(rel);
483
+ }
484
+ }
485
+ #describeTables(tables2) {
486
+ if (!tables2.length) {
487
+ return "Schema unavailable.";
488
+ }
489
+ return tables2.map((table) => {
490
+ const rowCountInfo = table.rowCount != null ? ` [rows: ${table.rowCount}${table.sizeHint ? `, size: ${table.sizeHint}` : ""}]` : "";
491
+ const pkConstraint = table.constraints?.find(
492
+ (c) => c.type === "PRIMARY_KEY"
493
+ );
494
+ const pkColumns = new Set(pkConstraint?.columns ?? []);
495
+ const notNullColumns = new Set(
496
+ table.constraints?.filter((c) => c.type === "NOT_NULL").flatMap((c) => c.columns ?? []) ?? []
497
+ );
498
+ const defaultByColumn = /* @__PURE__ */ new Map();
499
+ for (const c of table.constraints?.filter(
500
+ (c2) => c2.type === "DEFAULT"
501
+ ) ?? []) {
502
+ for (const col of c.columns ?? []) {
503
+ if (c.defaultValue != null) {
504
+ defaultByColumn.set(col, c.defaultValue);
505
+ }
506
+ }
507
+ }
508
+ const uniqueColumns = new Set(
509
+ table.constraints?.filter((c) => c.type === "UNIQUE" && c.columns?.length === 1).flatMap((c) => c.columns ?? []) ?? []
510
+ );
511
+ const fkByColumn = /* @__PURE__ */ new Map();
512
+ for (const c of table.constraints?.filter(
513
+ (c2) => c2.type === "FOREIGN_KEY"
514
+ ) ?? []) {
515
+ const cols = c.columns ?? [];
516
+ const refCols = c.referencedColumns ?? [];
517
+ for (let i = 0; i < cols.length; i++) {
518
+ const refCol = refCols[i] ?? refCols[0] ?? cols[i];
519
+ fkByColumn.set(cols[i], `${c.referencedTable}.${refCol}`);
520
+ }
521
+ }
522
+ const columns = table.columns.map((column) => {
523
+ const annotations = [];
524
+ const isPrimaryKey = pkColumns.has(column.name);
525
+ if (isPrimaryKey) {
526
+ annotations.push("PK");
527
+ }
528
+ if (fkByColumn.has(column.name)) {
529
+ annotations.push(`FK -> ${fkByColumn.get(column.name)}`);
530
+ }
531
+ if (uniqueColumns.has(column.name)) {
532
+ annotations.push("UNIQUE");
533
+ }
534
+ if (notNullColumns.has(column.name)) {
535
+ annotations.push("NOT NULL");
536
+ }
537
+ if (defaultByColumn.has(column.name)) {
538
+ annotations.push(`DEFAULT: ${defaultByColumn.get(column.name)}`);
539
+ }
540
+ if (column.isIndexed && !isPrimaryKey) {
541
+ annotations.push("Indexed");
542
+ }
543
+ if (column.kind === "Enum" && column.values?.length) {
544
+ annotations.push(`Enum: ${column.values.join(", ")}`);
545
+ } else if (column.kind === "LowCardinality" && column.values?.length) {
546
+ annotations.push(`LowCardinality: ${column.values.join(", ")}`);
547
+ }
548
+ if (column.stats) {
549
+ const statParts = [];
550
+ if (column.stats.min != null || column.stats.max != null) {
551
+ const minText = column.stats.min ?? "n/a";
552
+ const maxText = column.stats.max ?? "n/a";
553
+ statParts.push(`range ${minText} \u2192 ${maxText}`);
554
+ }
555
+ if (column.stats.nullFraction != null && Number.isFinite(column.stats.nullFraction)) {
556
+ const percent = Math.round(column.stats.nullFraction * 1e3) / 10;
557
+ statParts.push(`null\u2248${percent}%`);
558
+ }
559
+ if (statParts.length) {
560
+ annotations.push(statParts.join(", "));
561
+ }
562
+ }
563
+ const annotationText = annotations.length ? ` [${annotations.join(", ")}]` : "";
564
+ return ` - ${column.name} (${column.type})${annotationText}`;
565
+ }).join("\n");
566
+ const indexes = table.indexes?.length ? `
567
+ Indexes:
568
+ ${table.indexes.map((index) => {
569
+ const props = [];
570
+ if (index.unique) {
571
+ props.push("UNIQUE");
572
+ }
573
+ if (index.type) {
574
+ props.push(index.type);
575
+ }
576
+ const propsText = props.length ? ` (${props.join(", ")})` : "";
577
+ const columnsText = index.columns?.length ? index.columns.join(", ") : "expression";
578
+ return ` - ${index.name}${propsText}: ${columnsText}`;
579
+ }).join("\n")}` : "";
580
+ const multiColumnUniques = table.constraints?.filter(
581
+ (c) => c.type === "UNIQUE" && (c.columns?.length ?? 0) > 1
582
+ ) ?? [];
583
+ const uniqueConstraints = multiColumnUniques.length ? `
584
+ Unique Constraints:
585
+ ${multiColumnUniques.map((c) => ` - ${c.name}: (${c.columns?.join(", ")})`).join("\n")}` : "";
586
+ const checkConstraints = table.constraints?.filter((c) => c.type === "CHECK") ?? [];
587
+ const checks = checkConstraints.length ? `
588
+ Check Constraints:
589
+ ${checkConstraints.map((c) => ` - ${c.name}: ${c.definition}`).join("\n")}` : "";
590
+ return `- Table: ${table.name}${rowCountInfo}
591
+ Columns:
592
+ ${columns}${indexes}${uniqueConstraints}${checks}`;
593
+ }).join("\n\n");
594
+ }
595
+ #formatTableLabel = (tableName) => {
596
+ const base = tableName.split(".").pop() ?? tableName;
597
+ return base.replace(/_/g, " ");
598
+ };
599
+ #describeRelationships = (tables2, relationships) => {
600
+ if (!relationships.length) {
601
+ return "None detected";
602
+ }
603
+ const tableMap = new Map(tables2.map((table) => [table.name, table]));
604
+ return relationships.map((relationship) => {
605
+ const sourceLabel = this.#formatTableLabel(relationship.table);
606
+ const targetLabel = this.#formatTableLabel(
607
+ relationship.referenced_table
608
+ );
609
+ const singularSource = pluralize.singular(sourceLabel);
610
+ const pluralSource = pluralize.plural(sourceLabel);
611
+ const singularTarget = pluralize.singular(targetLabel);
612
+ const pluralTarget = pluralize.plural(targetLabel);
613
+ const sourceTable = tableMap.get(relationship.table);
614
+ const targetTable = tableMap.get(relationship.referenced_table);
615
+ const sourceCount = sourceTable?.rowCount;
616
+ const targetCount = targetTable?.rowCount;
617
+ const ratio = sourceCount != null && targetCount != null && targetCount > 0 ? sourceCount / targetCount : null;
618
+ let cardinality = "each";
619
+ if (ratio != null) {
620
+ if (ratio > 5) {
621
+ cardinality = `many-to-one (\u2248${sourceCount} vs ${targetCount})`;
622
+ } else if (ratio < 1.2 && ratio > 0.8) {
623
+ cardinality = `roughly 1:1 (${sourceCount} vs ${targetCount})`;
624
+ } else if (ratio < 0.2) {
625
+ cardinality = `one-to-many (${sourceCount} vs ${targetCount})`;
626
+ }
627
+ }
628
+ const mappings = relationship.from.map((fromCol, idx) => {
629
+ const targetCol = relationship.to[idx] ?? relationship.to[0] ?? fromCol;
630
+ return `${relationship.table}.${fromCol} -> ${relationship.referenced_table}.${targetCol}`;
631
+ }).join(", ");
632
+ return `- ${relationship.table} (${relationship.from.join(", ")}) -> ${relationship.referenced_table} (${relationship.to.join(", ")}) [${cardinality}]`;
633
+ }).join("\n");
634
+ };
635
+ };
636
+
637
+ // packages/text2sql/src/lib/adapters/sqlite/column-stats.sqlite.grounding.ts
638
+ var SqliteColumnStatsGrounding = class extends ColumnStatsGrounding {
639
+ #adapter;
640
+ constructor(adapter, config = {}) {
641
+ super(config);
642
+ this.#adapter = adapter;
643
+ }
644
+ async collectStats(tableName, column) {
645
+ if (!this.#shouldCollectStats(column.type)) {
646
+ return void 0;
647
+ }
648
+ const tableIdentifier = this.#adapter.quoteIdentifier(tableName);
649
+ const columnIdentifier = this.#adapter.quoteIdentifier(column.name);
650
+ const sql = `
651
+ SELECT
652
+ MIN(${columnIdentifier}) AS min_value,
653
+ MAX(${columnIdentifier}) AS max_value,
654
+ AVG(CASE WHEN ${columnIdentifier} IS NULL THEN 1.0 ELSE 0.0 END) AS null_fraction
655
+ FROM ${tableIdentifier}
656
+ `;
657
+ const rows = await this.#adapter.runQuery(sql);
658
+ if (!rows.length) {
659
+ return void 0;
660
+ }
661
+ const min = this.#normalizeValue(rows[0]?.min_value);
662
+ const max = this.#normalizeValue(rows[0]?.max_value);
663
+ const nullFraction = this.#adapter.toNumber(rows[0]?.null_fraction);
664
+ if (min == null && max == null && nullFraction == null) {
665
+ return void 0;
666
+ }
667
+ return {
668
+ min: min ?? void 0,
669
+ max: max ?? void 0,
670
+ nullFraction: nullFraction != null && Number.isFinite(nullFraction) ? Math.max(0, Math.min(1, nullFraction)) : void 0
671
+ };
672
+ }
673
+ #shouldCollectStats(type) {
674
+ if (!type) {
675
+ return false;
676
+ }
677
+ const normalized = type.toLowerCase();
678
+ return /int|real|numeric|double|float|decimal|date|time|bool/.test(
679
+ normalized
680
+ );
681
+ }
682
+ #normalizeValue(value) {
683
+ if (value === null || value === void 0) {
684
+ return null;
685
+ }
686
+ if (typeof value === "string") {
687
+ return value;
688
+ }
689
+ if (typeof value === "number" || typeof value === "bigint") {
690
+ return String(value);
691
+ }
692
+ if (typeof value === "boolean") {
693
+ return value ? "true" : "false";
694
+ }
695
+ if (value instanceof Date) {
696
+ return value.toISOString();
697
+ }
698
+ if (typeof Buffer !== "undefined" && Buffer.isBuffer(value)) {
699
+ return value.toString("utf-8");
700
+ }
701
+ return null;
702
+ }
703
+ };
704
+
705
+ // packages/text2sql/src/lib/adapters/sqlite/info.sqlite.grounding.ts
706
+ var SqliteInfoGrounding = class extends InfoGrounding {
707
+ #adapter;
708
+ constructor(adapter, config = {}) {
709
+ super(config);
710
+ this.#adapter = adapter;
711
+ }
712
+ async collectInfo() {
713
+ const rows = await this.#adapter.runQuery(
714
+ "SELECT sqlite_version() AS version"
715
+ );
716
+ return {
717
+ dialect: "sqlite",
718
+ version: rows[0]?.version,
719
+ details: {
720
+ parameterPlaceholder: "?"
721
+ }
722
+ };
723
+ }
724
+ };
725
+
726
+ // packages/text2sql/src/lib/adapters/sqlite/column-values.sqlite.grounding.ts
727
+ var SqliteColumnValuesGrounding = class extends ColumnValuesGrounding {
728
+ #adapter;
729
+ constructor(adapter, config = {}) {
730
+ super(config);
731
+ this.#adapter = adapter;
732
+ }
733
+ async collectLowCardinality(tableName, column) {
734
+ const tableIdentifier = this.#adapter.quoteIdentifier(tableName);
735
+ const columnIdentifier = this.#adapter.quoteIdentifier(column.name);
736
+ const limit = this.lowCardinalityLimit + 1;
737
+ const sql = `
738
+ SELECT DISTINCT ${columnIdentifier} AS value
739
+ FROM ${tableIdentifier}
740
+ WHERE ${columnIdentifier} IS NOT NULL
741
+ LIMIT ${limit}
742
+ `;
743
+ const rows = await this.#adapter.runQuery(sql);
744
+ if (!rows.length || rows.length > this.lowCardinalityLimit) {
745
+ return void 0;
746
+ }
747
+ const values = [];
748
+ for (const row of rows) {
749
+ const formatted = this.#normalizeValue(row.value);
750
+ if (formatted == null) {
751
+ return void 0;
752
+ }
753
+ values.push(formatted);
754
+ }
755
+ return values.length ? values : void 0;
756
+ }
757
+ #normalizeValue(value) {
758
+ if (value === null || value === void 0) {
759
+ return null;
760
+ }
761
+ if (typeof value === "string") {
762
+ return value;
763
+ }
764
+ if (typeof value === "number" || typeof value === "bigint") {
765
+ return String(value);
766
+ }
767
+ if (typeof value === "boolean") {
768
+ return value ? "true" : "false";
769
+ }
770
+ if (value instanceof Date) {
771
+ return value.toISOString();
772
+ }
773
+ if (typeof Buffer !== "undefined" && Buffer.isBuffer(value)) {
774
+ return value.toString("utf-8");
775
+ }
776
+ return null;
777
+ }
778
+ };
779
+
780
+ // packages/text2sql/src/lib/adapters/sqlite/row-count.sqlite.grounding.ts
781
+ var SqliteRowCountGrounding = class extends RowCountGrounding {
782
+ #adapter;
783
+ constructor(adapter, config = {}) {
784
+ super(config);
785
+ this.#adapter = adapter;
786
+ }
787
+ async getRowCount(tableName) {
788
+ const rows = await this.#adapter.runQuery(
789
+ `SELECT COUNT(*) as count FROM ${this.#adapter.quoteIdentifier(tableName)}`
790
+ );
791
+ return this.#adapter.toNumber(rows[0]?.count);
792
+ }
793
+ };
794
+
795
+ // packages/text2sql/src/lib/adapters/sqlite/sqlite.ts
796
+ var SQL_ERROR_MAP = [
797
+ {
798
+ pattern: /^no such table: .+$/,
799
+ type: "MISSING_TABLE",
800
+ hint: "Check the database schema for the correct table name. The table you referenced does not exist."
801
+ },
802
+ {
803
+ pattern: /^no such column: .+$/,
804
+ type: "INVALID_COLUMN",
805
+ hint: "Check the table schema for correct column names. The column may not exist or is ambiguous (exists in multiple joined tables)."
806
+ },
807
+ {
808
+ pattern: /^ambiguous column name: .+$/,
809
+ type: "INVALID_COLUMN",
810
+ hint: "Check the table schema for correct column names. The column may not exist or is ambiguous (exists in multiple joined tables)."
811
+ },
812
+ {
813
+ pattern: /^near ".+": syntax error$/,
814
+ type: "SYNTAX_ERROR",
815
+ hint: "There is a SQL syntax error. Review the query structure, keywords, and punctuation."
816
+ },
817
+ {
818
+ pattern: /^no tables specified$/,
819
+ type: "SYNTAX_ERROR",
820
+ hint: "There is a SQL syntax error. Review the query structure, keywords, and punctuation."
821
+ },
822
+ {
823
+ pattern: /^attempt to write a readonly database$/,
824
+ type: "CONSTRAINT_ERROR",
825
+ hint: "A database constraint was violated. This should not happen with read-only queries."
826
+ }
827
+ ];
828
+ function formatError(sql, error) {
829
+ const errorMessage = error instanceof Error ? error.message : typeof error === "string" ? error : "Unknown error occurred";
830
+ const errorInfo = SQL_ERROR_MAP.find((it) => it.pattern.test(errorMessage));
831
+ if (!errorInfo) {
832
+ return {
833
+ error: errorMessage,
834
+ error_type: "UNKNOWN_ERROR",
835
+ suggestion: "Review the query and try again",
836
+ sql_attempted: sql
837
+ };
838
+ }
839
+ return {
840
+ error: errorMessage,
841
+ error_type: errorInfo.type,
842
+ suggestion: errorInfo.hint,
843
+ sql_attempted: sql
844
+ };
845
+ }
846
+ var Sqlite = class extends Adapter {
847
+ #options;
848
+ grounding;
849
+ defaultSchema = void 0;
850
+ systemSchemas = [];
851
+ constructor(options) {
852
+ super();
853
+ if (!options || typeof options.execute !== "function") {
854
+ throw new Error("Sqlite adapter requires an execute function.");
855
+ }
856
+ this.#options = options;
857
+ this.grounding = options.grounding;
858
+ }
859
+ async execute(sql) {
860
+ return this.#options.execute(sql);
861
+ }
862
+ async validate(sql) {
863
+ const validator = this.#options.validate ?? (async (text) => {
864
+ await this.#options.execute(`EXPLAIN ${text}`);
865
+ });
866
+ try {
867
+ return await validator(sql);
868
+ } catch (error) {
869
+ return JSON.stringify(formatError(sql, error));
870
+ }
871
+ }
872
+ #quoteIdentifier(name) {
873
+ return `'${name.replace(/'/g, "''")}'`;
874
+ }
875
+ async runQuery(sql) {
876
+ const result = await this.#options.execute(sql);
877
+ if (Array.isArray(result)) {
878
+ return result;
879
+ }
880
+ if (result && typeof result === "object" && "rows" in result && Array.isArray(result.rows)) {
881
+ return result.rows;
882
+ }
883
+ throw new Error(
884
+ "Sqlite adapter execute() must return an array of rows or an object with a rows array when introspecting."
885
+ );
886
+ }
887
+ quoteIdentifier(name) {
888
+ return `"${name.replace(/"/g, '""')}"`;
889
+ }
890
+ escape(value) {
891
+ return value.replace(/"/g, '""');
892
+ }
893
+ buildSampleRowsQuery(tableName, columns, limit) {
894
+ const columnList = columns?.length ? columns.map((c) => this.quoteIdentifier(c)).join(", ") : "*";
895
+ return `SELECT ${columnList} FROM ${this.quoteIdentifier(tableName)} LIMIT ${limit}`;
896
+ }
897
+ };
898
+
899
+ // packages/text2sql/src/lib/adapters/sqlite/table.sqlite.grounding.ts
900
+ var SqliteTableGrounding = class extends TableGrounding {
901
+ #adapter;
902
+ #relationshipCache = null;
903
+ constructor(adapter, config = {}) {
904
+ super(config);
905
+ this.#adapter = adapter;
906
+ }
907
+ async getAllTableNames() {
908
+ const rows = await this.#adapter.runQuery(`SELECT name FROM sqlite_master WHERE type='table' ORDER BY name`);
909
+ return rows.map((row) => row.name).filter(
910
+ (name) => typeof name === "string" && !name.startsWith("sqlite_")
911
+ );
912
+ }
913
+ async getTable(tableName) {
914
+ const columns = await this.#adapter.runQuery(
915
+ `PRAGMA table_info(${this.#quoteIdentifier(tableName)})`
916
+ );
917
+ return {
918
+ name: tableName,
919
+ rawName: tableName,
920
+ columns: columns.map((col) => ({
921
+ name: col.name ?? "unknown",
922
+ type: col.type ?? "unknown"
923
+ }))
924
+ };
925
+ }
926
+ async findOutgoingRelations(tableName) {
927
+ const rows = await this.#adapter.runQuery(
928
+ `PRAGMA foreign_key_list(${this.#quoteIdentifier(tableName)})`
929
+ );
930
+ const groups = /* @__PURE__ */ new Map();
931
+ for (const row of rows) {
932
+ if (row.id == null || row.table == null || row.from == null || row.to == null) {
933
+ continue;
934
+ }
935
+ const id = Number(row.id);
936
+ const existing = groups.get(id);
937
+ if (!existing) {
938
+ groups.set(id, {
939
+ table: tableName,
940
+ from: [String(row.from)],
941
+ referenced_table: String(row.table),
942
+ to: [String(row.to)]
943
+ });
944
+ } else {
945
+ existing.from.push(String(row.from));
946
+ existing.to.push(String(row.to));
947
+ }
948
+ }
949
+ return Array.from(groups.values());
950
+ }
951
+ async findIncomingRelations(tableName) {
952
+ if (!this.#relationshipCache) {
953
+ this.#relationshipCache = await this.#loadAllRelationships();
954
+ }
955
+ return this.#relationshipCache.filter(
956
+ (r) => r.referenced_table === tableName
957
+ );
958
+ }
959
+ async #loadAllRelationships() {
960
+ const allNames = await this.getAllTableNames();
961
+ const results = [];
962
+ for (const name of allNames) {
963
+ results.push(...await this.findOutgoingRelations(name));
964
+ }
965
+ return results;
966
+ }
967
+ #quoteIdentifier(name) {
968
+ return `'${name.replace(/'/g, "''")}'`;
969
+ }
970
+ };
971
+
972
+ // packages/text2sql/src/lib/adapters/sqlite/index.ts
973
+ function tables(config = {}) {
974
+ return (adapter) => new SqliteTableGrounding(adapter, config);
975
+ }
976
+ function info(config = {}) {
977
+ return (adapter) => new SqliteInfoGrounding(adapter, config);
978
+ }
979
+ function columnStats(config = {}) {
980
+ return (adapter) => {
981
+ return new SqliteColumnStatsGrounding(adapter, config);
982
+ };
983
+ }
984
+ function columnValues(config = {}) {
985
+ return (adapter) => {
986
+ return new SqliteColumnValuesGrounding(adapter, config);
987
+ };
988
+ }
989
+ function rowCount(config = {}) {
990
+ return (adapter) => {
991
+ return new SqliteRowCountGrounding(adapter, config);
992
+ };
993
+ }
994
+
995
+ // packages/text2sql/src/lib/adapters/spreadsheet/spreadsheet.ts
996
+ import { DatabaseSync } from "node:sqlite";
997
+
998
+ // packages/text2sql/src/lib/adapters/spreadsheet/parser.ts
999
+ import * as path from "node:path";
1000
+ import XLSX from "xlsx";
1001
+ function parseFile(filePath) {
1002
+ const ext = path.extname(filePath).toLowerCase();
1003
+ let workbook;
1004
+ try {
1005
+ workbook = XLSX.readFile(filePath, {
1006
+ cellDates: true
1007
+ // Parse dates as Date objects
1008
+ });
1009
+ } catch (error) {
1010
+ const message = error instanceof Error ? error.message : String(error);
1011
+ throw new Error(`Failed to read spreadsheet "${filePath}": ${message}`);
1012
+ }
1013
+ const sheets = [];
1014
+ for (const sheetName of workbook.SheetNames) {
1015
+ const sheet = workbook.Sheets[sheetName];
1016
+ const rows = XLSX.utils.sheet_to_json(sheet);
1017
+ if (rows.length === 0) {
1018
+ continue;
1019
+ }
1020
+ const tableName = ext === ".csv" || ext === ".tsv" ? getTableNameFromFile(filePath) : sanitizeTableName(sheetName);
1021
+ const columns = inferColumns(rows);
1022
+ if (columns.length === 0) {
1023
+ continue;
1024
+ }
1025
+ sheets.push({
1026
+ name: tableName,
1027
+ columns,
1028
+ rows
1029
+ });
1030
+ }
1031
+ if (sheets.length === 0) {
1032
+ throw new Error(
1033
+ `No valid sheets found in "${filePath}". All sheets are empty or have no columns.`
1034
+ );
1035
+ }
1036
+ return sheets;
1037
+ }
1038
+ function getTableNameFromFile(filePath) {
1039
+ const basename2 = path.basename(filePath, path.extname(filePath));
1040
+ return sanitizeTableName(basename2);
1041
+ }
1042
+ function sanitizeIdentifier(name) {
1043
+ let sanitized = name.toLowerCase();
1044
+ sanitized = sanitized.replace(/[^a-z0-9_]/g, "_");
1045
+ sanitized = sanitized.replace(/_+/g, "_");
1046
+ sanitized = sanitized.replace(/^_+|_+$/g, "");
1047
+ if (/^[0-9]/.test(sanitized)) {
1048
+ sanitized = "_" + sanitized;
1049
+ }
1050
+ if (!sanitized) {
1051
+ return "column";
1052
+ }
1053
+ return sanitized.slice(0, 64);
1054
+ }
1055
+ var sanitizeTableName = sanitizeIdentifier;
1056
+ function inferColumns(rows) {
1057
+ if (rows.length === 0) {
1058
+ return [];
1059
+ }
1060
+ const keySet = /* @__PURE__ */ new Set();
1061
+ for (const row of rows) {
1062
+ for (const key of Object.keys(row)) {
1063
+ keySet.add(key);
1064
+ }
1065
+ }
1066
+ if (keySet.size === 0) {
1067
+ return [];
1068
+ }
1069
+ const rawNames = Array.from(keySet);
1070
+ const dedupedNames = deduplicateColumnNames(rawNames);
1071
+ return dedupedNames.map((name, idx) => {
1072
+ const originalKey = rawNames[idx];
1073
+ const values = rows.map((row) => row[originalKey]);
1074
+ const type = inferColumnType(values);
1075
+ return { name, originalKey, type };
1076
+ });
1077
+ }
1078
+ function deduplicateColumnNames(names) {
1079
+ const seen = /* @__PURE__ */ new Map();
1080
+ const result = [];
1081
+ for (const rawName of names) {
1082
+ let name = sanitizeTableName(rawName);
1083
+ if (!name) {
1084
+ name = "column";
1085
+ }
1086
+ const count = seen.get(name) ?? 0;
1087
+ if (count > 0) {
1088
+ result.push(`${name}_${count + 1}`);
1089
+ } else {
1090
+ result.push(name);
1091
+ }
1092
+ seen.set(name, count + 1);
1093
+ }
1094
+ return result;
1095
+ }
1096
+ function inferColumnType(values) {
1097
+ let hasInteger = false;
1098
+ let hasReal = false;
1099
+ for (const value of values) {
1100
+ if (value == null || value === "") {
1101
+ continue;
1102
+ }
1103
+ if (value instanceof Date) {
1104
+ return "TEXT";
1105
+ }
1106
+ if (typeof value === "number") {
1107
+ if (Number.isInteger(value)) {
1108
+ hasInteger = true;
1109
+ } else {
1110
+ hasReal = true;
1111
+ }
1112
+ } else if (typeof value === "boolean") {
1113
+ hasInteger = true;
1114
+ } else {
1115
+ return "TEXT";
1116
+ }
1117
+ }
1118
+ if (hasReal) {
1119
+ return "REAL";
1120
+ }
1121
+ if (hasInteger) {
1122
+ return "INTEGER";
1123
+ }
1124
+ return "TEXT";
1125
+ }
1126
+
1127
+ // packages/text2sql/src/lib/adapters/spreadsheet/spreadsheet.ts
1128
+ var Spreadsheet = class extends Sqlite {
1129
+ #db;
1130
+ constructor(options) {
1131
+ const sheets = parseFile(options.file);
1132
+ const dbPath = options.database ?? ":memory:";
1133
+ const db = new DatabaseSync(dbPath);
1134
+ for (const sheet of sheets) {
1135
+ const createSQL = createTableSQL(sheet);
1136
+ db.exec(createSQL);
1137
+ loadData(db, sheet);
1138
+ }
1139
+ super({
1140
+ execute: (sql) => db.prepare(sql).all(),
1141
+ grounding: options.grounding
1142
+ });
1143
+ this.#db = db;
1144
+ }
1145
+ /**
1146
+ * Close the underlying SQLite database.
1147
+ * Call this when done to release resources.
1148
+ */
1149
+ close() {
1150
+ this.#db.close();
1151
+ }
1152
+ };
1153
+ function createTableSQL(sheet) {
1154
+ if (sheet.columns.length === 0) {
1155
+ throw new Error(`Cannot create table "${sheet.name}" with no columns.`);
1156
+ }
1157
+ const columns = sheet.columns.map((col) => `"${escapeIdentifier(col.name)}" ${col.type}`).join(", ");
1158
+ return `CREATE TABLE "${escapeIdentifier(sheet.name)}" (${columns})`;
1159
+ }
1160
+ function loadData(db, sheet) {
1161
+ if (sheet.rows.length === 0) {
1162
+ return;
1163
+ }
1164
+ const columns = sheet.columns.map((c) => `"${escapeIdentifier(c.name)}"`).join(", ");
1165
+ const placeholders = sheet.columns.map(() => "?").join(", ");
1166
+ const insertSQL = `INSERT INTO "${escapeIdentifier(sheet.name)}" (${columns}) VALUES (${placeholders})`;
1167
+ const stmt = db.prepare(insertSQL);
1168
+ db.exec("BEGIN TRANSACTION");
1169
+ try {
1170
+ for (const row of sheet.rows) {
1171
+ const values = sheet.columns.map((col) => {
1172
+ const rawValue = row[col.originalKey];
1173
+ return convertValue(rawValue, col.type);
1174
+ });
1175
+ stmt.run(...values);
1176
+ }
1177
+ db.exec("COMMIT");
1178
+ } catch (error) {
1179
+ db.exec("ROLLBACK");
1180
+ throw error;
1181
+ }
1182
+ }
1183
+ function convertValue(value, type) {
1184
+ if (value == null || value === "") {
1185
+ return null;
1186
+ }
1187
+ if (value instanceof Date) {
1188
+ return value.toISOString().split("T")[0];
1189
+ }
1190
+ switch (type) {
1191
+ case "INTEGER": {
1192
+ const num = Number(value);
1193
+ if (isNaN(num)) {
1194
+ return null;
1195
+ }
1196
+ return Math.floor(num);
1197
+ }
1198
+ case "REAL": {
1199
+ const num = Number(value);
1200
+ if (isNaN(num)) {
1201
+ return null;
1202
+ }
1203
+ return num;
1204
+ }
1205
+ case "TEXT":
1206
+ default: {
1207
+ if (typeof value === "boolean") {
1208
+ return value ? "true" : "false";
1209
+ }
1210
+ if (typeof value === "object") {
1211
+ return JSON.stringify(value);
1212
+ }
1213
+ return String(value);
1214
+ }
1215
+ }
1216
+ }
1217
+ function escapeIdentifier(name) {
1218
+ return name.replace(/"/g, '""');
1219
+ }
1220
+ export {
1221
+ Spreadsheet,
1222
+ columnStats,
1223
+ columnValues,
1224
+ info,
1225
+ parseFile,
1226
+ rowCount,
1227
+ sanitizeIdentifier,
1228
+ sanitizeTableName,
1229
+ tables
1230
+ };
1231
+ //# sourceMappingURL=index.js.map