postgres-scout-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,122 @@
1
+ import { Server } from '@modelcontextprotocol/sdk/server/index.js';
2
+ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
3
+ import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js';
4
+ import { RateLimiter } from '../utils/rate-limiter.js';
5
+ import { tools, executeTool } from '../tools/index.js';
6
+ import { sanitizeErrorMessage } from '../utils/sanitize.js';
7
+ import { zodToJsonSchema } from '../utils/zod-to-json-schema.js';
8
+ export function createMCPServer(connection, logger, config) {
9
+ const server = new Server({
10
+ name: 'postgres-scout-mcp',
11
+ version: '0.1.0',
12
+ }, {
13
+ capabilities: {
14
+ tools: {},
15
+ },
16
+ });
17
+ const rateLimiter = new RateLimiter(config.rateLimitMaxRequests, config.rateLimitWindowMs, config.enableRateLimit);
18
+ server.setRequestHandler(ListToolsRequestSchema, async () => {
19
+ logger.debug('mcp', 'Listing available tools');
20
+ const toolList = Object.entries(tools).map(([name, tool]) => ({
21
+ name,
22
+ description: getToolDescription(name, config.mode),
23
+ inputSchema: zodToJsonSchema(tool.schema)
24
+ }));
25
+ return { tools: toolList };
26
+ });
27
+ server.setRequestHandler(CallToolRequestSchema, async (request) => {
28
+ const { name, arguments: args } = request.params;
29
+ logger.info('mcp', `Tool called: ${name}`, { args });
30
+ try {
31
+ rateLimiter.checkLimit();
32
+ const result = await executeTool(name, connection, logger, args || {});
33
+ return {
34
+ content: [
35
+ {
36
+ type: 'text',
37
+ text: JSON.stringify(result, null, 2)
38
+ }
39
+ ]
40
+ };
41
+ }
42
+ catch (error) {
43
+ const rawMessage = error instanceof Error ? error.message : String(error);
44
+ logger.error('mcp', `Error executing tool ${name}`, { error: rawMessage });
45
+ return {
46
+ content: [
47
+ {
48
+ type: 'text',
49
+ text: JSON.stringify({
50
+ error: sanitizeErrorMessage(rawMessage),
51
+ tool: name
52
+ }, null, 2)
53
+ }
54
+ ],
55
+ isError: true
56
+ };
57
+ }
58
+ });
59
+ return server;
60
+ }
61
+ export async function startServer(server, logger) {
62
+ const transport = new StdioServerTransport();
63
+ logger.info('server', 'Starting MCP server');
64
+ await server.connect(transport);
65
+ logger.info('server', 'MCP server running on stdio');
66
+ }
67
+ function getToolDescription(name, mode) {
68
+ const descriptions = {
69
+ // Database operations
70
+ listDatabases: 'List all databases the user has access to',
71
+ getDatabaseStats: 'Get comprehensive database statistics including size, cache hit ratio, and connection info',
72
+ // Schema operations
73
+ listSchemas: 'List all schemas in the current database',
74
+ listTables: 'List all tables in a schema with size and statistics',
75
+ describeTable: 'Get comprehensive table information including columns, constraints, and indexes',
76
+ // Query operations
77
+ executeQuery: `Execute SELECT queries${mode === 'read-write' ? ' or write operations' : ' (read-only)'}`,
78
+ explainQuery: `Analyze query performance using EXPLAIN${mode === 'read-only' ? ' (ANALYZE disabled)' : ' ANALYZE'}`,
79
+ // Data quality tools
80
+ findDuplicates: 'Find duplicate rows based on column combinations',
81
+ findMissingValues: 'Find NULL values or missing data in columns',
82
+ findOrphans: 'Find orphaned records with invalid foreign key references',
83
+ checkConstraintViolations: 'Check for rows that would violate a constraint before adding it',
84
+ analyzeTypeConsistency: 'Analyze if text columns contain consistent data types',
85
+ // Temporal tools
86
+ findRecent: 'Find rows within a time window',
87
+ analyzeTimeSeries: 'Advanced time-series analysis with window functions and anomaly detection',
88
+ detectSeasonality: 'Detect seasonal patterns in time-series data',
89
+ // Monitoring tools
90
+ getCurrentActivity: 'Get current active queries and connections',
91
+ analyzeLocks: 'Analyze current locks and blocking queries',
92
+ getIndexUsage: 'Analyze index usage and identify unused indexes',
93
+ // Relationship tools
94
+ exploreRelationships: 'Follow foreign key relationships to explore related records',
95
+ analyzeForeignKeys: 'Analyze foreign key health and performance',
96
+ // Export tools
97
+ exportTable: 'Export table data to various formats (CSV, JSON, SQL)',
98
+ generateInsertStatements: 'Generate INSERT statements for data migration',
99
+ // Maintenance & health tools
100
+ analyzeTableBloat: 'Detect table and index bloat for VACUUM planning',
101
+ suggestVacuum: 'Analyze and recommend VACUUM operations based on dead tuples and bloat',
102
+ getHealthScore: 'Calculate overall database health score with component breakdown',
103
+ getSlowQueries: 'Analyze slow queries from pg_stat_statements extension',
104
+ // Optimization tools
105
+ suggestIndexes: 'Analyze query patterns and table scans to recommend missing indexes',
106
+ suggestPartitioning: 'Analyze large tables and recommend partitioning strategies',
107
+ detectAnomalies: 'Detect anomalies in query performance, connections, and data patterns',
108
+ optimizeQuery: `Analyze a specific query and provide optimization recommendations${mode === 'read-only' ? ' (estimated plan only — ANALYZE unavailable)' : ''}`,
109
+ // Mutation tools (safe write operations)
110
+ previewUpdate: 'Preview which rows would be affected by an UPDATE without modifying data',
111
+ previewDelete: 'Preview which rows would be deleted without actually deleting them',
112
+ safeUpdate: 'Execute UPDATE with safety guards: dry-run mode, maxRows limit, empty WHERE protection',
113
+ safeDelete: 'Execute DELETE with safety guards: dry-run mode, maxRows limit, empty WHERE protection',
114
+ safeInsert: 'Execute INSERT with safety guards: dry-run mode, maxRows limit, parameterized values, row batching, ON CONFLICT support',
115
+ // Live monitoring tools
116
+ getLiveMetrics: 'Collect real-time database metrics over a time period with configurable intervals',
117
+ getHottestTables: 'Identify tables with highest activity during a sample period',
118
+ getTableMetrics: 'Get comprehensive metrics for a specific table including I/O, scans, and maintenance stats'
119
+ };
120
+ return descriptions[name] || `Execute ${name} operation`;
121
+ }
122
+ //# sourceMappingURL=setup.js.map
@@ -0,0 +1,442 @@
1
+ import { z } from 'zod';
2
+ import { executeInternalQuery } from '../utils/database.js';
3
+ import { escapeIdentifier, sanitizeIdentifier, validateCondition } from '../utils/sanitize.js';
4
+ const FindDuplicatesSchema = z.object({
5
+ table: z.string(),
6
+ columns: z.array(z.string()),
7
+ schema: z.string().optional().default('public'),
8
+ limit: z.number().optional().default(100),
9
+ minCount: z.number().optional().default(2),
10
+ includeRows: z.boolean().optional().default(true)
11
+ });
12
+ const FindMissingValuesSchema = z.object({
13
+ table: z.string(),
14
+ columns: z.array(z.string()),
15
+ schema: z.string().optional().default('public'),
16
+ includeRows: z.boolean().optional().default(true),
17
+ limit: z.number().optional().default(100)
18
+ });
19
+ const FindOrphansSchema = z.object({
20
+ table: z.string(),
21
+ foreignKey: z.string(),
22
+ referenceTable: z.string(),
23
+ referenceColumn: z.string(),
24
+ schema: z.string().optional().default('public'),
25
+ referenceSchema: z.string().optional().default('public'),
26
+ limit: z.number().optional().default(100)
27
+ });
28
+ const CheckConstraintViolationsSchema = z.object({
29
+ table: z.string(),
30
+ condition: z.string().describe('SQL boolean expression to check, e.g., "email IS NOT NULL"'),
31
+ constraintName: z.string().optional().describe('Name for the constraint'),
32
+ schema: z.string().optional().default('public')
33
+ });
34
+ const AnalyzeTypeConsistencySchema = z.object({
35
+ table: z.string(),
36
+ column: z.string(),
37
+ schema: z.string().optional().default('public'),
38
+ suggestConversion: z.boolean().optional().default(true),
39
+ sampleSize: z.number().optional().default(10000)
40
+ });
41
+ export async function findDuplicates(connection, logger, args) {
42
+ const { table, columns, schema, limit, minCount, includeRows } = args;
43
+ logger.info('findDuplicates', 'Finding duplicate rows', { table, columns });
44
+ const sanitizedSchema = sanitizeIdentifier(schema);
45
+ const sanitizedTable = sanitizeIdentifier(table);
46
+ const sanitizedColumns = columns.map(sanitizeIdentifier);
47
+ const columnList = sanitizedColumns.map(escapeIdentifier).join(', ');
48
+ const groupByList = sanitizedColumns.map((col, idx) => `${idx + 1}`).join(', ');
49
+ const countQuery = `
50
+ SELECT COUNT(*) as total_rows
51
+ FROM ${escapeIdentifier(sanitizedSchema)}.${escapeIdentifier(sanitizedTable)}
52
+ `;
53
+ const duplicatesQuery = `
54
+ SELECT
55
+ ${columnList},
56
+ COUNT(*) as count
57
+ FROM ${escapeIdentifier(sanitizedSchema)}.${escapeIdentifier(sanitizedTable)}
58
+ GROUP BY ${columnList}
59
+ HAVING COUNT(*) >= $1
60
+ ORDER BY COUNT(*) DESC
61
+ LIMIT $2
62
+ `;
63
+ const [totalResult, duplicatesResult] = await Promise.all([
64
+ executeInternalQuery(connection, logger, { query: countQuery }),
65
+ executeInternalQuery(connection, logger, {
66
+ query: duplicatesQuery,
67
+ params: [minCount, limit]
68
+ })
69
+ ]);
70
+ const totalRows = parseInt(totalResult.rows[0]?.total_rows || '0', 10);
71
+ const duplicateGroups = duplicatesResult.rows;
72
+ let duplicateGroupsWithRows = duplicateGroups;
73
+ if (includeRows && duplicateGroups.length > 0) {
74
+ // Process in batches of 5 to avoid exhausting the connection pool
75
+ const BATCH_SIZE = 5;
76
+ const results = [];
77
+ for (let i = 0; i < duplicateGroups.length; i += BATCH_SIZE) {
78
+ const batch = duplicateGroups.slice(i, i + BATCH_SIZE);
79
+ const batchResults = await Promise.all(batch.map(async (group) => {
80
+ const whereConditions = sanitizedColumns.map((col, idx) => {
81
+ return `${escapeIdentifier(col)} = $${idx + 1}`;
82
+ }).join(' AND ');
83
+ const rowsQuery = `
84
+ SELECT *
85
+ FROM ${escapeIdentifier(sanitizedSchema)}.${escapeIdentifier(sanitizedTable)}
86
+ WHERE ${whereConditions}
87
+ LIMIT 10
88
+ `;
89
+ const params = sanitizedColumns.map(col => group[col]);
90
+ const rowsResult = await executeInternalQuery(connection, logger, {
91
+ query: rowsQuery,
92
+ params
93
+ });
94
+ return {
95
+ ...group,
96
+ rows: rowsResult.rows
97
+ };
98
+ }));
99
+ results.push(...batchResults);
100
+ }
101
+ duplicateGroupsWithRows = results;
102
+ }
103
+ const totalDuplicateRows = duplicateGroups.reduce((sum, group) => sum + parseInt(group.count, 10), 0);
104
+ const recommendations = [];
105
+ if (duplicateGroups.length > 0) {
106
+ recommendations.push(`Found ${totalDuplicateRows} duplicate rows across ${duplicateGroups.length} groups`);
107
+ recommendations.push(`Consider adding UNIQUE constraint: ALTER TABLE ${schema}.${table} ADD CONSTRAINT ${table}_${columns.join('_')}_unique UNIQUE (${columns.join(', ')})`);
108
+ recommendations.push('Review and delete duplicates, keeping the most recent or earliest record');
109
+ }
110
+ else {
111
+ recommendations.push('✓ No duplicates found');
112
+ }
113
+ return {
114
+ table,
115
+ schema,
116
+ columns,
117
+ totalDuplicateGroups: duplicateGroups.length,
118
+ affectedRows: totalDuplicateRows,
119
+ statistics: {
120
+ totalRows,
121
+ uniqueRows: totalRows - totalDuplicateRows,
122
+ duplicateRows: totalDuplicateRows,
123
+ duplicatePercentage: totalRows > 0 ? ((totalDuplicateRows / totalRows) * 100).toFixed(2) : '0'
124
+ },
125
+ duplicateGroups: duplicateGroupsWithRows.slice(0, limit),
126
+ recommendations
127
+ };
128
+ }
129
+ export async function findMissingValues(connection, logger, args) {
130
+ const { table, columns, schema, includeRows, limit } = args;
131
+ logger.info('findMissingValues', 'Finding NULL values', { table, columns });
132
+ const sanitizedSchema = sanitizeIdentifier(schema);
133
+ const sanitizedTable = sanitizeIdentifier(table);
134
+ const sanitizedColumns = columns.map(sanitizeIdentifier);
135
+ const countQuery = `
136
+ SELECT COUNT(*) as total_rows
137
+ FROM ${escapeIdentifier(sanitizedSchema)}.${escapeIdentifier(sanitizedTable)}
138
+ `;
139
+ const totalResult = await executeInternalQuery(connection, logger, { query: countQuery });
140
+ const totalRows = parseInt(totalResult.rows[0]?.total_rows || '0', 10);
141
+ const analysis = {};
142
+ for (const column of sanitizedColumns) {
143
+ const nullCountQuery = `
144
+ SELECT
145
+ COUNT(*) FILTER (WHERE ${escapeIdentifier(column)} IS NULL) as null_count
146
+ FROM ${escapeIdentifier(sanitizedSchema)}.${escapeIdentifier(sanitizedTable)}
147
+ `;
148
+ const nullResult = await executeInternalQuery(connection, logger, { query: nullCountQuery });
149
+ const nullCount = parseInt(nullResult.rows[0]?.null_count || '0', 10);
150
+ const nullPercentage = totalRows > 0 ? ((nullCount / totalRows) * 100).toFixed(2) : '0';
151
+ let recommendation = '';
152
+ let sampleRows = [];
153
+ if (nullCount === 0) {
154
+ recommendation = '✓ No NULL values';
155
+ }
156
+ else {
157
+ const percentage = parseFloat(nullPercentage);
158
+ if (percentage < 1) {
159
+ recommendation = `${nullCount} rows with NULL ${column} - minor issue`;
160
+ }
161
+ else if (percentage < 5) {
162
+ recommendation = `⚠ ${percentage}% of rows missing ${column} - investigate`;
163
+ }
164
+ else {
165
+ recommendation = `⚠ ${percentage}% of rows missing ${column} - set default or make required`;
166
+ }
167
+ if (includeRows && nullCount > 0) {
168
+ const sampleQuery = `
169
+ SELECT *
170
+ FROM ${escapeIdentifier(sanitizedSchema)}.${escapeIdentifier(sanitizedTable)}
171
+ WHERE ${escapeIdentifier(column)} IS NULL
172
+ LIMIT $1
173
+ `;
174
+ const sampleResult = await executeInternalQuery(connection, logger, {
175
+ query: sampleQuery,
176
+ params: [limit]
177
+ });
178
+ sampleRows = sampleResult.rows;
179
+ }
180
+ }
181
+ analysis[column] = {
182
+ nullCount,
183
+ nullPercentage: parseFloat(nullPercentage),
184
+ recommendation,
185
+ ...(sampleRows.length > 0 && { sampleRows })
186
+ };
187
+ }
188
+ const recommendations = [];
189
+ for (const [column, data] of Object.entries(analysis)) {
190
+ if (data.nullCount === 0) {
191
+ recommendations.push(`Consider adding NOT NULL constraint to ${column}`);
192
+ }
193
+ else if (data.nullPercentage > 5) {
194
+ recommendations.push(`High NULL rate in ${column} (${data.nullPercentage}%) - investigate data quality`);
195
+ }
196
+ }
197
+ return {
198
+ table,
199
+ schema,
200
+ totalRows,
201
+ analysis,
202
+ recommendations
203
+ };
204
+ }
205
+ export async function findOrphans(connection, logger, args) {
206
+ const { table, foreignKey, referenceTable, referenceColumn, schema, referenceSchema, limit } = args;
207
+ logger.info('findOrphans', 'Finding orphaned records', { table, foreignKey, referenceTable });
208
+ const sanitizedSchema = sanitizeIdentifier(schema);
209
+ const sanitizedTable = sanitizeIdentifier(table);
210
+ const sanitizedForeignKey = sanitizeIdentifier(foreignKey);
211
+ const sanitizedRefSchema = sanitizeIdentifier(referenceSchema);
212
+ const sanitizedRefTable = sanitizeIdentifier(referenceTable);
213
+ const sanitizedRefColumn = sanitizeIdentifier(referenceColumn);
214
+ const orphansQuery = `
215
+ SELECT t.*
216
+ FROM ${escapeIdentifier(sanitizedSchema)}.${escapeIdentifier(sanitizedTable)} t
217
+ LEFT JOIN ${escapeIdentifier(sanitizedRefSchema)}.${escapeIdentifier(sanitizedRefTable)} r
218
+ ON t.${escapeIdentifier(sanitizedForeignKey)} = r.${escapeIdentifier(sanitizedRefColumn)}
219
+ WHERE t.${escapeIdentifier(sanitizedForeignKey)} IS NOT NULL
220
+ AND r.${escapeIdentifier(sanitizedRefColumn)} IS NULL
221
+ LIMIT $1
222
+ `;
223
+ const countQuery = `
224
+ SELECT COUNT(*) as orphan_count
225
+ FROM ${escapeIdentifier(sanitizedSchema)}.${escapeIdentifier(sanitizedTable)} t
226
+ LEFT JOIN ${escapeIdentifier(sanitizedRefSchema)}.${escapeIdentifier(sanitizedRefTable)} r
227
+ ON t.${escapeIdentifier(sanitizedForeignKey)} = r.${escapeIdentifier(sanitizedRefColumn)}
228
+ WHERE t.${escapeIdentifier(sanitizedForeignKey)} IS NOT NULL
229
+ AND r.${escapeIdentifier(sanitizedRefColumn)} IS NULL
230
+ `;
231
+ const totalQuery = `
232
+ SELECT COUNT(*) as total_count
233
+ FROM ${escapeIdentifier(sanitizedSchema)}.${escapeIdentifier(sanitizedTable)}
234
+ `;
235
+ const [orphansResult, countResult, totalResult] = await Promise.all([
236
+ executeInternalQuery(connection, logger, { query: orphansQuery, params: [limit] }),
237
+ executeInternalQuery(connection, logger, { query: countQuery }),
238
+ executeInternalQuery(connection, logger, { query: totalQuery })
239
+ ]);
240
+ const orphanCount = parseInt(countResult.rows[0]?.orphan_count || '0', 10);
241
+ const totalCount = parseInt(totalResult.rows[0]?.total_count || '0', 10);
242
+ const orphanPercentage = totalCount > 0 ? ((orphanCount / totalCount) * 100).toFixed(2) : '0';
243
+ const recommendations = [];
244
+ if (orphanCount > 0) {
245
+ recommendations.push(`Found ${orphanCount} orphaned records (${orphanPercentage}% of total)`);
246
+ recommendations.push(`Delete orphaned records: DELETE FROM ${schema}.${table} WHERE ${foreignKey} NOT IN (SELECT ${referenceColumn} FROM ${referenceSchema}.${referenceTable})`);
247
+ recommendations.push(`Or set to NULL: UPDATE ${schema}.${table} SET ${foreignKey} = NULL WHERE ${foreignKey} NOT IN (SELECT ${referenceColumn} FROM ${referenceSchema}.${referenceTable})`);
248
+ recommendations.push(`After cleanup, add FK constraint: ALTER TABLE ${schema}.${table} ADD CONSTRAINT ${table}_${foreignKey}_fkey FOREIGN KEY (${foreignKey}) REFERENCES ${referenceSchema}.${referenceTable}(${referenceColumn})`);
249
+ }
250
+ else {
251
+ recommendations.push('✓ No orphaned records found');
252
+ recommendations.push(`Safe to add FK constraint: ALTER TABLE ${schema}.${table} ADD CONSTRAINT ${table}_${foreignKey}_fkey FOREIGN KEY (${foreignKey}) REFERENCES ${referenceSchema}.${referenceTable}(${referenceColumn})`);
253
+ }
254
+ return {
255
+ table,
256
+ schema,
257
+ foreignKey,
258
+ referenceTable,
259
+ referenceSchema,
260
+ referenceColumn,
261
+ orphanCount,
262
+ totalCount,
263
+ orphanPercentage: parseFloat(orphanPercentage),
264
+ orphanedRows: orphansResult.rows,
265
+ recommendations
266
+ };
267
+ }
268
+ export async function checkConstraintViolations(connection, logger, args) {
269
+ const { table, condition, constraintName, schema } = args;
270
+ logger.info('checkConstraintViolations', 'Checking constraint violations', { table, condition });
271
+ validateCondition(condition);
272
+ const sanitizedSchema = sanitizeIdentifier(schema);
273
+ const sanitizedTable = sanitizeIdentifier(table);
274
+ const name = constraintName || `${table}_check`;
275
+ const violationsQuery = `
276
+ SELECT *
277
+ FROM ${escapeIdentifier(sanitizedSchema)}.${escapeIdentifier(sanitizedTable)}
278
+ WHERE NOT (${condition})
279
+ LIMIT 100
280
+ `;
281
+ const countQuery = `
282
+ SELECT COUNT(*) as violation_count
283
+ FROM ${escapeIdentifier(sanitizedSchema)}.${escapeIdentifier(sanitizedTable)}
284
+ WHERE NOT (${condition})
285
+ `;
286
+ const [violationsResult, countResult] = await Promise.all([
287
+ executeInternalQuery(connection, logger, { query: violationsQuery }),
288
+ executeInternalQuery(connection, logger, { query: countQuery })
289
+ ]);
290
+ const violationCount = parseInt(countResult.rows[0]?.violation_count || '0', 10);
291
+ const recommendations = [];
292
+ if (violationCount > 0) {
293
+ recommendations.push(`⚠ ${violationCount} rows would violate CHECK constraint`);
294
+ recommendations.push('Fix violations before adding constraint');
295
+ recommendations.push(`Example: UPDATE ${schema}.${table} SET ... WHERE NOT (${condition})`);
296
+ }
297
+ else {
298
+ recommendations.push('✓ No violations found - safe to add constraint');
299
+ recommendations.push(`ALTER TABLE ${schema}.${table} ADD CONSTRAINT ${name} CHECK (${condition})`);
300
+ }
301
+ return {
302
+ table,
303
+ schema,
304
+ constraint: name,
305
+ condition,
306
+ violationCount,
307
+ violations: violationsResult.rows.slice(0, 20),
308
+ recommendations
309
+ };
310
+ }
311
+ export async function analyzeTypeConsistency(connection, logger, args) {
312
+ const { table, column, schema, suggestConversion, sampleSize } = args;
313
+ logger.info('analyzeTypeConsistency', 'Analyzing type consistency', { table, column });
314
+ const sanitizedSchema = sanitizeIdentifier(schema);
315
+ const sanitizedTable = sanitizeIdentifier(table);
316
+ const sanitizedColumn = sanitizeIdentifier(column);
317
+ const typeQuery = `
318
+ SELECT
319
+ data_type as current_type
320
+ FROM information_schema.columns
321
+ WHERE table_schema = $1
322
+ AND table_name = $2
323
+ AND column_name = $3
324
+ `;
325
+ const typeResult = await executeInternalQuery(connection, logger, {
326
+ query: typeQuery,
327
+ params: [sanitizedSchema, sanitizedTable, sanitizedColumn]
328
+ });
329
+ const currentType = typeResult.rows[0]?.current_type || 'unknown';
330
+ const analysisQuery = `
331
+ SELECT
332
+ COUNT(*) as total_rows,
333
+ COUNT(*) FILTER (WHERE ${escapeIdentifier(sanitizedColumn)} IS NULL) as null_count,
334
+ COUNT(*) FILTER (WHERE ${escapeIdentifier(sanitizedColumn)} ~ '^[0-9]+$') as integer_count,
335
+ COUNT(*) FILTER (WHERE ${escapeIdentifier(sanitizedColumn)} ~ '^[0-9]+\\.[0-9]+$') as decimal_count,
336
+ COUNT(*) FILTER (WHERE ${escapeIdentifier(sanitizedColumn)} ~ '^[0-9]{4}-[0-9]{2}-[0-9]{2}') as date_count,
337
+ COUNT(*) FILTER (WHERE ${escapeIdentifier(sanitizedColumn)} ~ '^(true|false|t|f|yes|no|y|n|1|0)$') as boolean_count
338
+ FROM (
339
+ SELECT ${escapeIdentifier(sanitizedColumn)}
340
+ FROM ${escapeIdentifier(sanitizedSchema)}.${escapeIdentifier(sanitizedTable)}
341
+ LIMIT $1
342
+ ) sample
343
+ `;
344
+ const analysisResult = await executeInternalQuery(connection, logger, {
345
+ query: analysisQuery,
346
+ params: [sampleSize]
347
+ });
348
+ const stats = analysisResult.rows[0];
349
+ const totalRows = parseInt(stats.total_rows, 10);
350
+ const nullCount = parseInt(stats.null_count, 10);
351
+ const numericCount = parseInt(stats.integer_count, 10) + parseInt(stats.decimal_count, 10);
352
+ const dateCount = parseInt(stats.date_count, 10);
353
+ const booleanCount = parseInt(stats.boolean_count, 10);
354
+ const invalidCount = totalRows - nullCount - numericCount - dateCount - booleanCount;
355
+ const patterns = {
356
+ numeric: {
357
+ count: numericCount,
358
+ percentage: ((numericCount / totalRows) * 100).toFixed(1)
359
+ },
360
+ date: {
361
+ count: dateCount,
362
+ percentage: ((dateCount / totalRows) * 100).toFixed(1)
363
+ },
364
+ boolean: {
365
+ count: booleanCount,
366
+ percentage: ((booleanCount / totalRows) * 100).toFixed(1)
367
+ },
368
+ null: {
369
+ count: nullCount,
370
+ percentage: ((nullCount / totalRows) * 100).toFixed(1)
371
+ },
372
+ invalid: {
373
+ count: invalidCount,
374
+ percentage: ((invalidCount / totalRows) * 100).toFixed(1)
375
+ }
376
+ };
377
+ const recommendations = [];
378
+ let suggestedMigration = null;
379
+ if (numericCount / totalRows > 0.95) {
380
+ recommendations.push(`✓ ${patterns.numeric.percentage}% of values are numeric`);
381
+ if (invalidCount > 0) {
382
+ recommendations.push(`${invalidCount} rows contain non-numeric values - clean up first`);
383
+ }
384
+ if (suggestConversion) {
385
+ const targetType = parseInt(stats.decimal_count, 10) > 0 ? 'numeric(10,2)' : 'integer';
386
+ recommendations.push(`Consider converting to ${targetType}: ALTER TABLE ${schema}.${table} ALTER COLUMN ${column} TYPE ${targetType} USING ${column}::${targetType}`);
387
+ suggestedMigration = {
388
+ targetType,
389
+ needsCleanup: invalidCount > 0,
390
+ cleanupQuery: `UPDATE ${schema}.${table} SET ${column} = NULL WHERE ${column} !~ '^[0-9.]+$'`,
391
+ conversionQuery: `ALTER TABLE ${schema}.${table} ALTER COLUMN ${column} TYPE ${targetType} USING ${column}::${targetType}`
392
+ };
393
+ }
394
+ }
395
+ else if (dateCount / totalRows > 0.95) {
396
+ recommendations.push(`✓ ${patterns.date.percentage}% of values are date-like`);
397
+ recommendations.push(`Consider converting to DATE or TIMESTAMP`);
398
+ }
399
+ else if (booleanCount / totalRows > 0.95) {
400
+ recommendations.push(`✓ ${patterns.boolean.percentage}% of values are boolean-like`);
401
+ recommendations.push(`Consider converting to BOOLEAN`);
402
+ }
403
+ else {
404
+ recommendations.push('⚠ Data has mixed types - not suitable for type conversion');
405
+ }
406
+ return {
407
+ table,
408
+ schema,
409
+ column,
410
+ currentType,
411
+ analysis: {
412
+ totalRows,
413
+ sampleSize,
414
+ patterns
415
+ },
416
+ recommendations,
417
+ ...(suggestedMigration && { suggestedMigration })
418
+ };
419
+ }
420
+ export const dataQualityTools = {
421
+ findDuplicates: {
422
+ schema: FindDuplicatesSchema,
423
+ handler: findDuplicates
424
+ },
425
+ findMissingValues: {
426
+ schema: FindMissingValuesSchema,
427
+ handler: findMissingValues
428
+ },
429
+ findOrphans: {
430
+ schema: FindOrphansSchema,
431
+ handler: findOrphans
432
+ },
433
+ checkConstraintViolations: {
434
+ schema: CheckConstraintViolationsSchema,
435
+ handler: checkConstraintViolations
436
+ },
437
+ analyzeTypeConsistency: {
438
+ schema: AnalyzeTypeConsistencySchema,
439
+ handler: analyzeTypeConsistency
440
+ }
441
+ };
442
+ //# sourceMappingURL=data-quality.js.map