@rog0x/mcp-database-tools 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,469 @@
1
+ /**
2
+ * Query Builder — convert natural language descriptions into SQL queries.
3
+ * Uses pattern matching and heuristics to generate SQL from plain English.
4
+ */
5
+
6
+ interface QueryBuildResult {
7
+ sql: string;
8
+ dialect: string;
9
+ explanation: string;
10
+ assumptions: string[];
11
+ warnings: string[];
12
+ }
13
+
14
+ interface ParsedIntent {
15
+ action: "select" | "insert" | "update" | "delete" | "count" | "aggregate";
16
+ tables: string[];
17
+ columns: string[];
18
+ conditions: ConditionClause[];
19
+ orderBy: OrderClause[];
20
+ groupBy: string[];
21
+ limit: number | null;
22
+ joins: JoinClause[];
23
+ aggregates: AggregateClause[];
24
+ distinct: boolean;
25
+ }
26
+
27
+ interface ConditionClause {
28
+ column: string;
29
+ operator: string;
30
+ value: string;
31
+ connector: "AND" | "OR";
32
+ }
33
+
34
+ interface OrderClause {
35
+ column: string;
36
+ direction: "ASC" | "DESC";
37
+ }
38
+
39
+ interface JoinClause {
40
+ table: string;
41
+ onLeft: string;
42
+ onRight: string;
43
+ type: "INNER" | "LEFT" | "RIGHT";
44
+ }
45
+
46
+ interface AggregateClause {
47
+ func: string;
48
+ column: string;
49
+ alias: string;
50
+ }
51
+
52
+ type Dialect = "mysql" | "postgresql" | "sqlite" | "generic";
53
+
54
+ function parseNaturalLanguage(text: string): ParsedIntent {
55
+ const lower = text.toLowerCase().trim();
56
+ const assumptions: string[] = [];
57
+
58
+ // Detect action
59
+ let action: ParsedIntent["action"] = "select";
60
+ if (/^(count|how many)/i.test(lower)) action = "count";
61
+ else if (/^(add|insert|create a|create new)/i.test(lower)) action = "insert";
62
+ else if (/^(update|change|modify|set)/i.test(lower)) action = "update";
63
+ else if (/^(delete|remove|drop all)/i.test(lower)) action = "delete";
64
+ else if (/\b(average|sum|total|minimum|maximum|avg|min|max)\b/i.test(lower)) action = "aggregate";
65
+
66
+ // Extract tables
67
+ const tables: string[] = [];
68
+ const tablePatterns = [
69
+ /\bfrom\s+(?:the\s+)?(\w+)\s+table/i,
70
+ /\bfrom\s+(\w+)/i,
71
+ /\bin\s+(?:the\s+)?(\w+)\s+table/i,
72
+ /\bin\s+(\w+)/i,
73
+ /\b(?:all|get|find|show|list|fetch|retrieve)\s+(\w+)/i,
74
+ /\b(?:update|modify|change)\s+(?:the\s+)?(\w+)/i,
75
+ /\b(?:delete|remove)\s+(?:from\s+)?(?:the\s+)?(\w+)/i,
76
+ /\b(?:insert|add)\s+(?:into\s+|to\s+)?(?:the\s+)?(\w+)/i,
77
+ /\b(\w+)\s+who\b/i,
78
+ /\b(\w+)\s+that\b/i,
79
+ /\b(\w+)\s+where\b/i,
80
+ /\b(\w+)\s+with\b/i,
81
+ ];
82
+
83
+ const stopWords = new Set([
84
+ "get", "find", "show", "list", "fetch", "retrieve", "all", "the",
85
+ "count", "how", "many", "number", "total", "average", "sum",
86
+ "minimum", "maximum", "each", "every", "and", "have", "has",
87
+ "are", "were", "been", "being", "their", "them", "more", "than",
88
+ "least", "most", "between", "not", "top", "first", "last",
89
+ "update", "delete", "insert", "add", "remove", "change", "modify",
90
+ "set", "new", "old", "create",
91
+ ]);
92
+
93
+ for (const pattern of tablePatterns) {
94
+ const match = lower.match(pattern);
95
+ if (match && !stopWords.has(match[1].toLowerCase())) {
96
+ const tbl = match[1];
97
+ if (!tables.includes(tbl)) tables.push(tbl);
98
+ break;
99
+ }
100
+ }
101
+
102
+ if (tables.length === 0) {
103
+ tables.push("table_name");
104
+ assumptions.push("Could not determine table name — used placeholder 'table_name'");
105
+ }
106
+
107
+ // Detect JOINs from "who have" or "with at least N" patterns
108
+ const joins: JoinClause[] = [];
109
+ const joinPatterns = [
110
+ /\bwho\s+(?:have|has)\s+(?:at\s+least\s+)?(?:\d+\s+)?(\w+)/i,
111
+ /\bwith\s+(?:at\s+least\s+)?(?:\d+\s+)?(\w+)/i,
112
+ /\bjoin(?:ed)?\s+(?:with\s+)?(?:the\s+)?(\w+)/i,
113
+ ];
114
+
115
+ for (const pattern of joinPatterns) {
116
+ const match = lower.match(pattern);
117
+ if (match && !stopWords.has(match[1])) {
118
+ const joinTable = match[1];
119
+ if (!tables.includes(joinTable) && joinTable !== tables[0]) {
120
+ joins.push({
121
+ table: joinTable,
122
+ onLeft: `${tables[0]}.id`,
123
+ onRight: `${joinTable}.${tables[0].replace(/s$/, "")}_id`,
124
+ type: "INNER",
125
+ });
126
+ }
127
+ }
128
+ }
129
+
130
+ // Extract conditions
131
+ const conditions: ConditionClause[] = [];
132
+
133
+ // "signed up this month" / "created this month"
134
+ if (/\b(?:this month|current month)\b/i.test(lower)) {
135
+ const colGuess = /\bsigned up\b/i.test(lower) ? "created_at" :
136
+ /\bcreated\b/i.test(lower) ? "created_at" :
137
+ /\bregistered\b/i.test(lower) ? "registered_at" :
138
+ "created_at";
139
+ conditions.push({
140
+ column: colGuess,
141
+ operator: ">=",
142
+ value: "DATE_TRUNC('month', CURRENT_DATE)",
143
+ connector: "AND",
144
+ });
145
+ }
146
+
147
+ // "this year" / "current year"
148
+ if (/\b(?:this year|current year)\b/i.test(lower)) {
149
+ conditions.push({
150
+ column: "created_at",
151
+ operator: ">=",
152
+ value: "DATE_TRUNC('year', CURRENT_DATE)",
153
+ connector: "AND",
154
+ });
155
+ }
156
+
157
+ // "last N days"
158
+ const lastDaysMatch = lower.match(/\blast\s+(\d+)\s+days?\b/i);
159
+ if (lastDaysMatch) {
160
+ conditions.push({
161
+ column: "created_at",
162
+ operator: ">=",
163
+ value: `CURRENT_DATE - INTERVAL '${lastDaysMatch[1]} days'`,
164
+ connector: "AND",
165
+ });
166
+ }
167
+
168
+ // "older than N"
169
+ const olderMatch = lower.match(/\bolder\s+than\s+(\d+)/i);
170
+ if (olderMatch) {
171
+ conditions.push({
172
+ column: "age",
173
+ operator: ">",
174
+ value: olderMatch[1],
175
+ connector: "AND",
176
+ });
177
+ }
178
+
179
+ // "where X is Y" / "where X = Y"
180
+ const whereMatch = lower.match(/\bwhere\s+(\w+)\s+(?:is|=|equals?)\s+['"]?(\w+)['"]?/i);
181
+ if (whereMatch) {
182
+ conditions.push({
183
+ column: whereMatch[1],
184
+ operator: "=",
185
+ value: `'${whereMatch[2]}'`,
186
+ connector: "AND",
187
+ });
188
+ }
189
+
190
+ // "active" / "inactive" / "enabled" / "disabled"
191
+ if (/\bactive\b/i.test(lower) && !/\binactive\b/i.test(lower)) {
192
+ conditions.push({ column: "active", operator: "=", value: "TRUE", connector: "AND" });
193
+ }
194
+ if (/\binactive\b/i.test(lower)) {
195
+ conditions.push({ column: "active", operator: "=", value: "FALSE", connector: "AND" });
196
+ }
197
+
198
+ // "at least N"
199
+ const atLeastMatch = lower.match(/\bat\s+least\s+(\d+)\s+(\w+)/i);
200
+ let havingClause: { column: string; op: string; value: string } | null = null;
201
+ if (atLeastMatch) {
202
+ havingClause = {
203
+ column: atLeastMatch[2],
204
+ op: ">=",
205
+ value: atLeastMatch[1],
206
+ };
207
+ }
208
+
209
+ // "more than N"
210
+ const moreThanMatch = lower.match(/\bmore\s+than\s+(\d+)\s+(\w+)/i);
211
+ if (moreThanMatch && !havingClause) {
212
+ havingClause = {
213
+ column: moreThanMatch[2],
214
+ op: ">",
215
+ value: moreThanMatch[1],
216
+ };
217
+ }
218
+
219
+ // Aggregates
220
+ const aggregates: AggregateClause[] = [];
221
+ const groupBy: string[] = [];
222
+
223
+ if (action === "aggregate" || action === "count" || havingClause) {
224
+ if (/\baverage\b|\bavg\b/i.test(lower)) {
225
+ const avgMatch = lower.match(/\b(?:average|avg)\s+(\w+)/i);
226
+ aggregates.push({
227
+ func: "AVG",
228
+ column: avgMatch ? avgMatch[1] : "*",
229
+ alias: `avg_${avgMatch ? avgMatch[1] : "value"}`,
230
+ });
231
+ }
232
+ if (/\btotal\b|\bsum\b/i.test(lower)) {
233
+ const sumMatch = lower.match(/\b(?:total|sum)\s+(?:of\s+)?(\w+)/i);
234
+ aggregates.push({
235
+ func: "SUM",
236
+ column: sumMatch ? sumMatch[1] : "*",
237
+ alias: `total_${sumMatch ? sumMatch[1] : "value"}`,
238
+ });
239
+ }
240
+ if (/\bminimum\b|\bmin\b|\blowest\b/i.test(lower)) {
241
+ const minMatch = lower.match(/\b(?:minimum|min|lowest)\s+(\w+)/i);
242
+ aggregates.push({
243
+ func: "MIN",
244
+ column: minMatch ? minMatch[1] : "*",
245
+ alias: `min_${minMatch ? minMatch[1] : "value"}`,
246
+ });
247
+ }
248
+ if (/\bmaximum\b|\bmax\b|\bhighest\b/i.test(lower)) {
249
+ const maxMatch = lower.match(/\b(?:maximum|max|highest)\s+(\w+)/i);
250
+ aggregates.push({
251
+ func: "MAX",
252
+ column: maxMatch ? maxMatch[1] : "*",
253
+ alias: `max_${maxMatch ? maxMatch[1] : "value"}`,
254
+ });
255
+ }
256
+ }
257
+
258
+ // "per" or "for each" or "by" → GROUP BY
259
+ const perMatch = lower.match(/\b(?:per|for each|by|grouped by|group by)\s+(\w+)/i);
260
+ if (perMatch) {
261
+ groupBy.push(perMatch[1]);
262
+ }
263
+
264
+ // Detect DISTINCT
265
+ const distinct = /\bunique\b|\bdistinct\b/i.test(lower);
266
+
267
+ // Detect ORDER BY
268
+ const orderBy: OrderClause[] = [];
269
+ if (/\b(?:sorted|ordered|order)\s+by\s+(\w+)\s*(asc|desc|ascending|descending)?/i.test(lower)) {
270
+ const orderMatch = lower.match(
271
+ /\b(?:sorted|ordered|order)\s+by\s+(\w+)\s*(asc|desc|ascending|descending)?/i
272
+ );
273
+ if (orderMatch) {
274
+ orderBy.push({
275
+ column: orderMatch[1],
276
+ direction: orderMatch[2] && /desc/i.test(orderMatch[2]) ? "DESC" : "ASC",
277
+ });
278
+ }
279
+ }
280
+ if (/\bnewest\s+first\b/i.test(lower)) {
281
+ orderBy.push({ column: "created_at", direction: "DESC" });
282
+ }
283
+ if (/\boldest\s+first\b/i.test(lower)) {
284
+ orderBy.push({ column: "created_at", direction: "ASC" });
285
+ }
286
+
287
+ // Detect LIMIT
288
+ let limit: number | null = null;
289
+ const topMatch = lower.match(/\b(?:top|first|limit)\s+(\d+)/i);
290
+ if (topMatch) limit = parseInt(topMatch[1], 10);
291
+
292
+ // Columns
293
+ const columns: string[] = [];
294
+
295
+ // Build the intent
296
+ const intent: ParsedIntent = {
297
+ action,
298
+ tables,
299
+ columns,
300
+ conditions,
301
+ orderBy,
302
+ groupBy,
303
+ limit,
304
+ joins,
305
+ aggregates,
306
+ distinct,
307
+ };
308
+
309
+ return intent;
310
+ }
311
+
312
+ function buildSql(intent: ParsedIntent, dialect: Dialect): { sql: string; assumptions: string[]; warnings: string[] } {
313
+ const assumptions: string[] = [];
314
+ const warnings: string[] = [];
315
+ const parts: string[] = [];
316
+ const mainTable = intent.tables[0];
317
+
318
+ switch (intent.action) {
319
+ case "select": {
320
+ let selectCols = intent.columns.length > 0 ? intent.columns.join(", ") : "*";
321
+ if (intent.distinct) selectCols = `DISTINCT ${selectCols}`;
322
+ parts.push(`SELECT ${selectCols}`);
323
+ parts.push(`FROM ${mainTable}`);
324
+ break;
325
+ }
326
+ case "count": {
327
+ if (intent.groupBy.length > 0) {
328
+ parts.push(`SELECT ${intent.groupBy.join(", ")}, COUNT(*) AS count`);
329
+ } else {
330
+ parts.push("SELECT COUNT(*) AS total_count");
331
+ }
332
+ parts.push(`FROM ${mainTable}`);
333
+ break;
334
+ }
335
+ case "aggregate": {
336
+ const aggCols = intent.aggregates.map((a) => `${a.func}(${a.column}) AS ${a.alias}`);
337
+ const selectParts = intent.groupBy.length > 0
338
+ ? [...intent.groupBy, ...aggCols]
339
+ : aggCols;
340
+ parts.push(`SELECT ${selectParts.join(", ")}`);
341
+ parts.push(`FROM ${mainTable}`);
342
+ break;
343
+ }
344
+ case "insert": {
345
+ parts.push(`INSERT INTO ${mainTable} (column1, column2)`);
346
+ parts.push("VALUES ('value1', 'value2')");
347
+ assumptions.push("Placeholder columns and values used — replace with actual data");
348
+ break;
349
+ }
350
+ case "update": {
351
+ parts.push(`UPDATE ${mainTable}`);
352
+ parts.push("SET column1 = 'new_value'");
353
+ assumptions.push("Placeholder SET clause — replace with actual columns and values");
354
+ break;
355
+ }
356
+ case "delete": {
357
+ parts.push(`DELETE FROM ${mainTable}`);
358
+ break;
359
+ }
360
+ }
361
+
362
+ // JOINs
363
+ for (const join of intent.joins) {
364
+ parts.push(`${join.type} JOIN ${join.table}`);
365
+ parts.push(` ON ${join.onLeft} = ${join.onRight}`);
366
+ assumptions.push(
367
+ `Assumed join condition: ${join.onLeft} = ${join.onRight} — adjust if your schema differs`
368
+ );
369
+ }
370
+
371
+ // WHERE
372
+ if (intent.conditions.length > 0) {
373
+ const condStrs = intent.conditions.map((c, i) => {
374
+ const prefix = i === 0 ? "WHERE" : ` ${c.connector}`;
375
+ return `${prefix} ${c.column} ${c.operator} ${c.value}`;
376
+ });
377
+ parts.push(...condStrs);
378
+ } else if (intent.action === "update" || intent.action === "delete") {
379
+ warnings.push("No WHERE clause detected — this will affect ALL rows");
380
+ }
381
+
382
+ // GROUP BY
383
+ if (intent.groupBy.length > 0) {
384
+ parts.push(`GROUP BY ${intent.groupBy.join(", ")}`);
385
+ } else if (intent.joins.length > 0 && intent.action === "select") {
386
+ // If there's a join that implies aggregation, add GROUP BY
387
+ parts.push(`GROUP BY ${mainTable}.id`);
388
+ assumptions.push(`Added GROUP BY ${mainTable}.id — adjust based on your primary key`);
389
+ }
390
+
391
+ // HAVING (for "at least N" type conditions)
392
+ const havingAgg = intent.joins.length > 0 ? intent.joins[0] : null;
393
+ if (havingAgg) {
394
+ // Check for "at least" or "more than" in the original conditions context
395
+ // This is handled via the aggregates/havingClause approach
396
+ }
397
+
398
+ // ORDER BY
399
+ if (intent.orderBy.length > 0) {
400
+ parts.push(
401
+ `ORDER BY ${intent.orderBy.map((o) => `${o.column} ${o.direction}`).join(", ")}`
402
+ );
403
+ }
404
+
405
+ // LIMIT
406
+ if (intent.limit !== null) {
407
+ if (dialect === "mysql" || dialect === "postgresql" || dialect === "sqlite" || dialect === "generic") {
408
+ parts.push(`LIMIT ${intent.limit}`);
409
+ }
410
+ }
411
+
412
+ return {
413
+ sql: parts.join("\n"),
414
+ assumptions,
415
+ warnings,
416
+ };
417
+ }
418
+
419
+ export function queryBuilder(
420
+ description: string,
421
+ dialect: string = "postgresql",
422
+ schemaHint: string = ""
423
+ ): QueryBuildResult {
424
+ const d = (["mysql", "postgresql", "sqlite"].includes(dialect)
425
+ ? dialect
426
+ : "generic") as Dialect;
427
+
428
+ const intent = parseNaturalLanguage(description);
429
+
430
+ // If schema hint is provided, try to use real column names
431
+ if (schemaHint) {
432
+ // Simple extraction of table and column names from schema hint
433
+ const tableMatch = schemaHint.match(/\b(\w+)\s*\(/g);
434
+ if (tableMatch) {
435
+ const hintTables = tableMatch.map((t) => t.replace(/\s*\(/, "").trim());
436
+ // If the first detected table is a placeholder, try to find a real one
437
+ if (intent.tables[0] === "table_name" && hintTables.length > 0) {
438
+ intent.tables[0] = hintTables[0];
439
+ }
440
+ }
441
+ }
442
+
443
+ const { sql, assumptions, warnings } = buildSql(intent, d);
444
+
445
+ // Generate explanation
446
+ const explanation = [
447
+ `Generates a ${intent.action.toUpperCase()} query against the "${intent.tables[0]}" table.`,
448
+ intent.joins.length > 0
449
+ ? `Joins with ${intent.joins.map((j) => j.table).join(", ")}.`
450
+ : "",
451
+ intent.conditions.length > 0
452
+ ? `Filters: ${intent.conditions.map((c) => `${c.column} ${c.operator} ${c.value}`).join(", ")}.`
453
+ : "",
454
+ intent.orderBy.length > 0
455
+ ? `Sorted by ${intent.orderBy.map((o) => `${o.column} ${o.direction}`).join(", ")}.`
456
+ : "",
457
+ intent.limit ? `Limited to ${intent.limit} results.` : "",
458
+ ]
459
+ .filter(Boolean)
460
+ .join(" ");
461
+
462
+ return {
463
+ sql,
464
+ dialect: d,
465
+ explanation,
466
+ assumptions,
467
+ warnings,
468
+ };
469
+ }