postgres-scout-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +190 -0
- package/README.md +234 -0
- package/bin/cli.js +67 -0
- package/dist/config/environment.js +52 -0
- package/dist/index.js +59 -0
- package/dist/server/setup.js +122 -0
- package/dist/tools/data-quality.js +442 -0
- package/dist/tools/database.js +148 -0
- package/dist/tools/export.js +223 -0
- package/dist/tools/index.js +52 -0
- package/dist/tools/live-monitoring.js +369 -0
- package/dist/tools/maintenance.js +617 -0
- package/dist/tools/monitoring.js +286 -0
- package/dist/tools/mutations.js +410 -0
- package/dist/tools/optimization.js +1094 -0
- package/dist/tools/query.js +138 -0
- package/dist/tools/relationships.js +261 -0
- package/dist/tools/schema.js +253 -0
- package/dist/tools/temporal.js +313 -0
- package/dist/types.js +2 -0
- package/dist/utils/database.js +123 -0
- package/dist/utils/logger.js +73 -0
- package/dist/utils/query-builder.js +180 -0
- package/dist/utils/rate-limiter.js +39 -0
- package/dist/utils/result-formatter.js +42 -0
- package/dist/utils/sanitize.js +525 -0
- package/dist/utils/zod-to-json-schema.js +85 -0
- package/package.json +58 -0
|
@@ -0,0 +1,1094 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
import { executeInternalQuery } from '../utils/database.js';
|
|
3
|
+
import { sanitizeIdentifier, assertNoSensitiveCatalogAccess } from '../utils/sanitize.js';
|
|
4
|
+
const SuggestIndexesSchema = z.object({
|
|
5
|
+
schema: z.string().optional().default('public'),
|
|
6
|
+
table: z.string().optional(),
|
|
7
|
+
minSeqScans: z.number().optional().default(100),
|
|
8
|
+
minRowsPerScan: z.number().optional().default(1000),
|
|
9
|
+
includePartialIndexes: z.boolean().optional().default(false),
|
|
10
|
+
includeCoveringIndexes: z.boolean().optional().default(false),
|
|
11
|
+
analyzeQueries: z.boolean().optional().default(true)
|
|
12
|
+
});
|
|
13
|
+
const SuggestPartitioningSchema = z.object({
|
|
14
|
+
schema: z.string().optional().default('public'),
|
|
15
|
+
table: z.string(),
|
|
16
|
+
minRowsThreshold: z.number().optional().default(1000000),
|
|
17
|
+
analyzeQueryPatterns: z.boolean().optional().default(true),
|
|
18
|
+
targetPartitionSize: z.string().optional().default('1GB')
|
|
19
|
+
});
|
|
20
|
+
const DetectAnomaliesSchema = z.object({
|
|
21
|
+
type: z.enum(['query_performance', 'data_volume', 'connections', 'errors', 'all']).optional().default('all'),
|
|
22
|
+
schema: z.string().optional().default('public'),
|
|
23
|
+
table: z.string().optional(),
|
|
24
|
+
timeWindow: z.string().optional().default('24h'),
|
|
25
|
+
sensitivityLevel: z.enum(['low', 'medium', 'high']).optional().default('medium'),
|
|
26
|
+
zScoreThreshold: z.number().optional().default(2)
|
|
27
|
+
});
|
|
28
|
+
const OptimizeQuerySchema = z.object({
|
|
29
|
+
query: z.string(),
|
|
30
|
+
includeRewrite: z.boolean().optional().default(true),
|
|
31
|
+
includeIndexes: z.boolean().optional().default(true),
|
|
32
|
+
targetTimeMs: z.number().optional()
|
|
33
|
+
});
|
|
34
|
+
export async function suggestIndexes(connection, logger, args) {
|
|
35
|
+
const { schema, table, minSeqScans, minRowsPerScan, analyzeQueries } = args;
|
|
36
|
+
logger.info('suggestIndexes', 'Analyzing index opportunities', { schema, table });
|
|
37
|
+
const sanitizedSchema = sanitizeIdentifier(schema);
|
|
38
|
+
const tableFilter = table ? `AND relname = $2` : '';
|
|
39
|
+
const params = table ? [sanitizedSchema, sanitizeIdentifier(table)] : [sanitizedSchema];
|
|
40
|
+
// Find tables with high sequential scan activity
|
|
41
|
+
const seqScanQuery = `
|
|
42
|
+
SELECT
|
|
43
|
+
schemaname,
|
|
44
|
+
relname as table_name,
|
|
45
|
+
seq_scan,
|
|
46
|
+
seq_tup_read,
|
|
47
|
+
idx_scan,
|
|
48
|
+
idx_tup_fetch,
|
|
49
|
+
n_live_tup as row_count,
|
|
50
|
+
CASE
|
|
51
|
+
WHEN seq_scan = 0 THEN 0
|
|
52
|
+
ELSE ROUND(seq_tup_read::numeric / seq_scan, 0)
|
|
53
|
+
END as avg_rows_per_scan,
|
|
54
|
+
pg_size_pretty(pg_total_relation_size(schemaname || '.' || relname)) as table_size
|
|
55
|
+
FROM pg_stat_user_tables
|
|
56
|
+
WHERE schemaname = $1
|
|
57
|
+
${tableFilter}
|
|
58
|
+
AND seq_scan >= ${minSeqScans}
|
|
59
|
+
AND seq_tup_read / NULLIF(seq_scan, 0) >= ${minRowsPerScan}
|
|
60
|
+
ORDER BY seq_tup_read DESC
|
|
61
|
+
LIMIT 50
|
|
62
|
+
`;
|
|
63
|
+
// Find foreign keys without indexes
|
|
64
|
+
const fkWithoutIndexQuery = `
|
|
65
|
+
SELECT
|
|
66
|
+
tc.table_schema,
|
|
67
|
+
tc.table_name,
|
|
68
|
+
kcu.column_name,
|
|
69
|
+
ccu.table_name as referenced_table,
|
|
70
|
+
ccu.column_name as referenced_column
|
|
71
|
+
FROM information_schema.table_constraints tc
|
|
72
|
+
JOIN information_schema.key_column_usage kcu
|
|
73
|
+
ON tc.constraint_name = kcu.constraint_name
|
|
74
|
+
AND tc.table_schema = kcu.table_schema
|
|
75
|
+
JOIN information_schema.constraint_column_usage ccu
|
|
76
|
+
ON tc.constraint_name = ccu.constraint_name
|
|
77
|
+
AND tc.table_schema = ccu.table_schema
|
|
78
|
+
WHERE tc.constraint_type = 'FOREIGN KEY'
|
|
79
|
+
AND tc.table_schema = $1
|
|
80
|
+
${tableFilter ? tableFilter.replace('relname', 'tc.table_name') : ''}
|
|
81
|
+
AND NOT EXISTS (
|
|
82
|
+
SELECT 1
|
|
83
|
+
FROM pg_indexes pi
|
|
84
|
+
WHERE pi.schemaname = tc.table_schema
|
|
85
|
+
AND pi.tablename = tc.table_name
|
|
86
|
+
AND pi.indexdef LIKE '%(' || kcu.column_name || ')%'
|
|
87
|
+
)
|
|
88
|
+
`;
|
|
89
|
+
// Find columns frequently used in WHERE clauses (from pg_stat_statements if available)
|
|
90
|
+
const queryPatternQuery = analyzeQueries ? `
|
|
91
|
+
SELECT
|
|
92
|
+
queryid,
|
|
93
|
+
query,
|
|
94
|
+
calls,
|
|
95
|
+
mean_exec_time,
|
|
96
|
+
total_exec_time
|
|
97
|
+
FROM pg_stat_statements
|
|
98
|
+
WHERE query ILIKE '%WHERE%'
|
|
99
|
+
AND query ILIKE '%${sanitizedSchema}%'
|
|
100
|
+
${table ? `AND query ILIKE '%${sanitizeIdentifier(table)}%'` : ''}
|
|
101
|
+
AND calls > 10
|
|
102
|
+
ORDER BY total_exec_time DESC
|
|
103
|
+
LIMIT 20
|
|
104
|
+
` : null;
|
|
105
|
+
// Get existing indexes for analysis
|
|
106
|
+
const existingIndexesQuery = `
|
|
107
|
+
SELECT
|
|
108
|
+
schemaname,
|
|
109
|
+
tablename,
|
|
110
|
+
indexname,
|
|
111
|
+
indexdef,
|
|
112
|
+
pg_relation_size(schemaname || '.' || indexname) as size_bytes
|
|
113
|
+
FROM pg_indexes
|
|
114
|
+
WHERE schemaname = $1
|
|
115
|
+
${tableFilter ? tableFilter.replace('relname', 'tablename') : ''}
|
|
116
|
+
ORDER BY pg_relation_size(schemaname || '.' || indexname) DESC
|
|
117
|
+
`;
|
|
118
|
+
// Get index usage stats
|
|
119
|
+
const indexUsageQuery = `
|
|
120
|
+
SELECT
|
|
121
|
+
schemaname,
|
|
122
|
+
relname as table_name,
|
|
123
|
+
indexrelname as index_name,
|
|
124
|
+
idx_scan,
|
|
125
|
+
idx_tup_read,
|
|
126
|
+
idx_tup_fetch,
|
|
127
|
+
pg_size_pretty(pg_relation_size(indexrelid)) as index_size
|
|
128
|
+
FROM pg_stat_user_indexes
|
|
129
|
+
WHERE schemaname = $1
|
|
130
|
+
${tableFilter ? tableFilter.replace('relname', 'relname') : ''}
|
|
131
|
+
ORDER BY idx_scan ASC
|
|
132
|
+
`;
|
|
133
|
+
const [seqScanResult, fkResult, existingResult, usageResult] = await Promise.all([
|
|
134
|
+
executeInternalQuery(connection, logger, { query: seqScanQuery, params }),
|
|
135
|
+
executeInternalQuery(connection, logger, { query: fkWithoutIndexQuery, params }),
|
|
136
|
+
executeInternalQuery(connection, logger, { query: existingIndexesQuery, params }),
|
|
137
|
+
executeInternalQuery(connection, logger, { query: indexUsageQuery, params })
|
|
138
|
+
]);
|
|
139
|
+
let queryPatternResult = { rows: [] };
|
|
140
|
+
if (queryPatternQuery) {
|
|
141
|
+
try {
|
|
142
|
+
queryPatternResult = await executeInternalQuery(connection, logger, { query: queryPatternQuery, params: [] });
|
|
143
|
+
}
|
|
144
|
+
catch {
|
|
145
|
+
// pg_stat_statements might not be available
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
const suggestions = [];
|
|
149
|
+
// Analyze sequential scans and suggest indexes
|
|
150
|
+
for (const row of seqScanResult.rows) {
|
|
151
|
+
const avgRowsPerScan = parseInt(row.avg_rows_per_scan || '0', 10);
|
|
152
|
+
const seqScans = parseInt(row.seq_scan || '0', 10);
|
|
153
|
+
const idxScans = parseInt(row.idx_scan || '0', 10);
|
|
154
|
+
// Only suggest if sequential scans significantly outnumber index scans
|
|
155
|
+
if (seqScans > idxScans * 2 && avgRowsPerScan > minRowsPerScan) {
|
|
156
|
+
const impact = avgRowsPerScan > 10000 ? 'critical' : avgRowsPerScan > 5000 ? 'high' : 'medium';
|
|
157
|
+
suggestions.push({
|
|
158
|
+
table: row.table_name,
|
|
159
|
+
type: 'sequential_scan',
|
|
160
|
+
impact,
|
|
161
|
+
reason: `High sequential scan activity: ${seqScans.toLocaleString()} scans reading ${avgRowsPerScan.toLocaleString()} avg rows`,
|
|
162
|
+
metrics: {
|
|
163
|
+
seqScans,
|
|
164
|
+
avgRowsPerScan,
|
|
165
|
+
idxScans,
|
|
166
|
+
tableSize: row.table_size,
|
|
167
|
+
rowCount: parseInt(row.row_count || '0', 10)
|
|
168
|
+
},
|
|
169
|
+
recommendation: 'Analyze common WHERE clause columns for this table and add appropriate indexes',
|
|
170
|
+
notes: [
|
|
171
|
+
'Run EXPLAIN ANALYZE on slow queries to identify filter columns',
|
|
172
|
+
'Consider composite indexes for multi-column filters'
|
|
173
|
+
]
|
|
174
|
+
});
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
// Add FK without index suggestions
|
|
178
|
+
for (const row of fkResult.rows) {
|
|
179
|
+
suggestions.push({
|
|
180
|
+
table: row.table_name,
|
|
181
|
+
columns: [row.column_name],
|
|
182
|
+
type: 'foreign_key',
|
|
183
|
+
impact: 'critical',
|
|
184
|
+
reason: `Foreign key to ${row.referenced_table}(${row.referenced_column}) without index`,
|
|
185
|
+
estimatedSpeedup: '10-100x for JOINs and cascading operations',
|
|
186
|
+
createStatement: `CREATE INDEX CONCURRENTLY idx_${row.table_name}_${row.column_name} ON ${schema}.${row.table_name} (${row.column_name});`,
|
|
187
|
+
notes: [
|
|
188
|
+
'Missing FK indexes cause slow JOINs',
|
|
189
|
+
'Cascading DELETEs/UPDATEs will be very slow',
|
|
190
|
+
'Use CONCURRENTLY to avoid blocking writes'
|
|
191
|
+
]
|
|
192
|
+
});
|
|
193
|
+
}
|
|
194
|
+
// Analyze existing indexes for issues
|
|
195
|
+
const existingAnalysis = {
|
|
196
|
+
totalIndexes: existingResult.rows.length,
|
|
197
|
+
unusedIndexes: 0,
|
|
198
|
+
duplicateIndexes: 0,
|
|
199
|
+
recommendations: []
|
|
200
|
+
};
|
|
201
|
+
const indexDefs = new Map();
|
|
202
|
+
for (const row of existingResult.rows) {
|
|
203
|
+
// Track for duplicate detection
|
|
204
|
+
const key = `${row.tablename}:${row.indexdef.replace(/CREATE.*ON/, '').trim()}`;
|
|
205
|
+
if (!indexDefs.has(key)) {
|
|
206
|
+
indexDefs.set(key, []);
|
|
207
|
+
}
|
|
208
|
+
indexDefs.get(key).push(row.indexname);
|
|
209
|
+
}
|
|
210
|
+
// Find duplicates
|
|
211
|
+
for (const [, indexes] of indexDefs) {
|
|
212
|
+
if (indexes.length > 1) {
|
|
213
|
+
existingAnalysis.duplicateIndexes++;
|
|
214
|
+
existingAnalysis.recommendations.push(`Duplicate indexes detected: ${indexes.join(', ')} - consider dropping all but one`);
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
// Find unused indexes
|
|
218
|
+
for (const row of usageResult.rows) {
|
|
219
|
+
const scans = parseInt(row.idx_scan || '0', 10);
|
|
220
|
+
if (scans === 0 && !row.index_name.includes('pkey') && !row.index_name.includes('_unique')) {
|
|
221
|
+
existingAnalysis.unusedIndexes++;
|
|
222
|
+
existingAnalysis.recommendations.push(`DROP INDEX CONCURRENTLY ${schema}.${row.index_name}; -- never used, ${row.index_size}`);
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
// Parse query patterns if available
|
|
226
|
+
const queryInsights = [];
|
|
227
|
+
if (queryPatternResult.rows.length > 0) {
|
|
228
|
+
for (const row of queryPatternResult.rows) {
|
|
229
|
+
const query = row.query;
|
|
230
|
+
// Extract potential column names from WHERE clauses
|
|
231
|
+
const whereMatch = query.match(/WHERE\s+([^;]+)/i);
|
|
232
|
+
if (whereMatch) {
|
|
233
|
+
queryInsights.push({
|
|
234
|
+
queryFragment: whereMatch[0].substring(0, 200),
|
|
235
|
+
calls: parseInt(row.calls || '0', 10),
|
|
236
|
+
meanTime: parseFloat(row.mean_exec_time || '0').toFixed(2) + 'ms',
|
|
237
|
+
hint: 'Analyze columns in this WHERE clause for indexing'
|
|
238
|
+
});
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
return {
|
|
243
|
+
schema,
|
|
244
|
+
table: table || 'all tables',
|
|
245
|
+
suggestions,
|
|
246
|
+
existingIndexAnalysis: existingAnalysis,
|
|
247
|
+
queryInsights: queryInsights.length > 0 ? queryInsights : undefined,
|
|
248
|
+
summary: {
|
|
249
|
+
suggestionsCount: suggestions.length,
|
|
250
|
+
criticalCount: suggestions.filter(s => s.impact === 'critical').length,
|
|
251
|
+
highCount: suggestions.filter(s => s.impact === 'high').length,
|
|
252
|
+
mediumCount: suggestions.filter(s => s.impact === 'medium').length
|
|
253
|
+
}
|
|
254
|
+
};
|
|
255
|
+
}
|
|
256
|
+
export async function suggestPartitioning(connection, logger, args) {
|
|
257
|
+
const { schema, table, minRowsThreshold, targetPartitionSize } = args;
|
|
258
|
+
logger.info('suggestPartitioning', 'Analyzing partitioning opportunities', { schema, table });
|
|
259
|
+
const sanitizedSchema = sanitizeIdentifier(schema);
|
|
260
|
+
const sanitizedTable = sanitizeIdentifier(table);
|
|
261
|
+
// Get table stats
|
|
262
|
+
const tableStatsQuery = `
|
|
263
|
+
SELECT
|
|
264
|
+
n_live_tup as row_count,
|
|
265
|
+
pg_total_relation_size($1 || '.' || $2) as total_bytes,
|
|
266
|
+
pg_size_pretty(pg_total_relation_size($1 || '.' || $2)) as total_size,
|
|
267
|
+
pg_size_pretty(pg_relation_size($1 || '.' || $2)) as table_size,
|
|
268
|
+
pg_size_pretty(pg_indexes_size(($1 || '.' || $2)::regclass)) as index_size
|
|
269
|
+
FROM pg_stat_user_tables
|
|
270
|
+
WHERE schemaname = $1 AND relname = $2
|
|
271
|
+
`;
|
|
272
|
+
// Get columns with data types suitable for partitioning
|
|
273
|
+
const columnsQuery = `
|
|
274
|
+
SELECT
|
|
275
|
+
column_name,
|
|
276
|
+
data_type,
|
|
277
|
+
is_nullable
|
|
278
|
+
FROM information_schema.columns
|
|
279
|
+
WHERE table_schema = $1 AND table_name = $2
|
|
280
|
+
ORDER BY ordinal_position
|
|
281
|
+
`;
|
|
282
|
+
// Check if table is already partitioned
|
|
283
|
+
const partitionCheckQuery = `
|
|
284
|
+
SELECT
|
|
285
|
+
relkind,
|
|
286
|
+
CASE relkind
|
|
287
|
+
WHEN 'p' THEN 'partitioned'
|
|
288
|
+
WHEN 'r' THEN 'regular'
|
|
289
|
+
ELSE 'other'
|
|
290
|
+
END as table_type
|
|
291
|
+
FROM pg_class c
|
|
292
|
+
JOIN pg_namespace n ON n.oid = c.relnamespace
|
|
293
|
+
WHERE n.nspname = $1 AND c.relname = $2
|
|
294
|
+
`;
|
|
295
|
+
const [statsResult, columnsResult, partitionResult] = await Promise.all([
|
|
296
|
+
executeInternalQuery(connection, logger, { query: tableStatsQuery, params: [sanitizedSchema, sanitizedTable] }),
|
|
297
|
+
executeInternalQuery(connection, logger, { query: columnsQuery, params: [sanitizedSchema, sanitizedTable] }),
|
|
298
|
+
executeInternalQuery(connection, logger, { query: partitionCheckQuery, params: [sanitizedSchema, sanitizedTable] })
|
|
299
|
+
]);
|
|
300
|
+
if (statsResult.rows.length === 0) {
|
|
301
|
+
return {
|
|
302
|
+
error: `Table ${schema}.${table} not found`
|
|
303
|
+
};
|
|
304
|
+
}
|
|
305
|
+
const stats = statsResult.rows[0];
|
|
306
|
+
const rowCount = parseInt(stats.row_count || '0', 10);
|
|
307
|
+
const totalBytes = parseInt(stats.total_bytes || '0', 10);
|
|
308
|
+
// Check if already partitioned
|
|
309
|
+
if (partitionResult.rows[0]?.table_type === 'partitioned') {
|
|
310
|
+
return {
|
|
311
|
+
table: `${schema}.${table}`,
|
|
312
|
+
status: 'already_partitioned',
|
|
313
|
+
message: 'This table is already partitioned',
|
|
314
|
+
currentSize: stats.total_size,
|
|
315
|
+
rowCount
|
|
316
|
+
};
|
|
317
|
+
}
|
|
318
|
+
// Check if table meets threshold
|
|
319
|
+
if (rowCount < minRowsThreshold) {
|
|
320
|
+
return {
|
|
321
|
+
table: `${schema}.${table}`,
|
|
322
|
+
status: 'not_recommended',
|
|
323
|
+
message: `Table has ${rowCount.toLocaleString()} rows, below threshold of ${minRowsThreshold.toLocaleString()}`,
|
|
324
|
+
currentSize: stats.total_size,
|
|
325
|
+
rowCount,
|
|
326
|
+
recommendation: 'Partitioning adds complexity without significant benefit for smaller tables'
|
|
327
|
+
};
|
|
328
|
+
}
|
|
329
|
+
// Analyze columns for partitioning candidates
|
|
330
|
+
const columns = columnsResult.rows;
|
|
331
|
+
const candidates = [];
|
|
332
|
+
// Find timestamp/date columns (best for range partitioning)
|
|
333
|
+
const temporalColumns = columns.filter((c) => ['timestamp', 'timestamptz', 'date'].includes(c.data_type));
|
|
334
|
+
for (const col of temporalColumns) {
|
|
335
|
+
// Get value distribution
|
|
336
|
+
const distQuery = `
|
|
337
|
+
SELECT
|
|
338
|
+
MIN(${col.column_name}) as min_val,
|
|
339
|
+
MAX(${col.column_name}) as max_val,
|
|
340
|
+
COUNT(DISTINCT DATE_TRUNC('month', ${col.column_name})) as distinct_months
|
|
341
|
+
FROM ${sanitizedSchema}.${sanitizedTable}
|
|
342
|
+
WHERE ${col.column_name} IS NOT NULL
|
|
343
|
+
LIMIT 1
|
|
344
|
+
`;
|
|
345
|
+
try {
|
|
346
|
+
const distResult = await executeInternalQuery(connection, logger, { query: distQuery, params: [] });
|
|
347
|
+
if (distResult.rows.length > 0) {
|
|
348
|
+
const dist = distResult.rows[0];
|
|
349
|
+
candidates.push({
|
|
350
|
+
column: col.column_name,
|
|
351
|
+
dataType: col.data_type,
|
|
352
|
+
strategy: 'range',
|
|
353
|
+
minValue: dist.min_val,
|
|
354
|
+
maxValue: dist.max_val,
|
|
355
|
+
distinctMonths: parseInt(dist.distinct_months || '0', 10),
|
|
356
|
+
score: 90 // Temporal columns are usually best
|
|
357
|
+
});
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
catch {
|
|
361
|
+
// Skip if query fails
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
// Find integer columns that could be used for hash partitioning
|
|
365
|
+
const intColumns = columns.filter((c) => ['integer', 'bigint', 'smallint'].includes(c.data_type));
|
|
366
|
+
for (const col of intColumns) {
|
|
367
|
+
if (col.column_name.includes('id')) {
|
|
368
|
+
candidates.push({
|
|
369
|
+
column: col.column_name,
|
|
370
|
+
dataType: col.data_type,
|
|
371
|
+
strategy: 'hash',
|
|
372
|
+
score: 60
|
|
373
|
+
});
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
// Find low-cardinality columns for list partitioning
|
|
377
|
+
for (const col of columns) {
|
|
378
|
+
if (['character varying', 'text', 'varchar'].includes(col.data_type)) {
|
|
379
|
+
const cardinalityQuery = `
|
|
380
|
+
SELECT COUNT(DISTINCT ${col.column_name}) as cardinality
|
|
381
|
+
FROM ${sanitizedSchema}.${sanitizedTable}
|
|
382
|
+
`;
|
|
383
|
+
try {
|
|
384
|
+
const cardResult = await executeInternalQuery(connection, logger, { query: cardinalityQuery, params: [] });
|
|
385
|
+
const cardinality = parseInt(cardResult.rows[0]?.cardinality || '0', 10);
|
|
386
|
+
if (cardinality > 0 && cardinality <= 20) {
|
|
387
|
+
candidates.push({
|
|
388
|
+
column: col.column_name,
|
|
389
|
+
dataType: col.data_type,
|
|
390
|
+
strategy: 'list',
|
|
391
|
+
cardinality,
|
|
392
|
+
score: 70
|
|
393
|
+
});
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
catch {
|
|
397
|
+
// Skip if query fails
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
// Sort by score
|
|
402
|
+
candidates.sort((a, b) => b.score - a.score);
|
|
403
|
+
// Generate recommendation
|
|
404
|
+
const recommendation = candidates.length > 0 ? generatePartitionRecommendation(schema, table, candidates[0], rowCount, totalBytes, targetPartitionSize) : null;
|
|
405
|
+
return {
|
|
406
|
+
table: `${schema}.${table}`,
|
|
407
|
+
currentSize: stats.total_size,
|
|
408
|
+
tableSize: stats.table_size,
|
|
409
|
+
indexSize: stats.index_size,
|
|
410
|
+
rowCount,
|
|
411
|
+
status: 'recommended',
|
|
412
|
+
candidates,
|
|
413
|
+
recommendation,
|
|
414
|
+
alternativeStrategies: candidates.slice(1, 3).map(c => ({
|
|
415
|
+
strategy: c.strategy,
|
|
416
|
+
partitionKey: c.column,
|
|
417
|
+
rationale: getStrategyRationale(c)
|
|
418
|
+
})),
|
|
419
|
+
warnings: [
|
|
420
|
+
'Partitioning requires table recreation - plan for downtime',
|
|
421
|
+
'Ensure application queries include partition key in WHERE clauses',
|
|
422
|
+
'Consider impact on existing indexes and constraints'
|
|
423
|
+
]
|
|
424
|
+
};
|
|
425
|
+
}
|
|
426
|
+
function generatePartitionRecommendation(schema, table, candidate, rowCount, totalBytes, targetPartitionSize) {
|
|
427
|
+
const targetBytes = parseTargetSize(targetPartitionSize);
|
|
428
|
+
const partitionCount = Math.ceil(totalBytes / targetBytes);
|
|
429
|
+
if (candidate.strategy === 'range' && candidate.distinctMonths) {
|
|
430
|
+
const interval = candidate.distinctMonths > 24 ? 'monthly' : 'quarterly';
|
|
431
|
+
return {
|
|
432
|
+
strategy: 'range',
|
|
433
|
+
partitionKey: candidate.column,
|
|
434
|
+
interval,
|
|
435
|
+
rationale: [
|
|
436
|
+
`Time-series data spanning ${candidate.distinctMonths} months`,
|
|
437
|
+
'Enables partition pruning for date-filtered queries',
|
|
438
|
+
'Easy archival of old data by dropping partitions'
|
|
439
|
+
],
|
|
440
|
+
benefits: [
|
|
441
|
+
'Query performance: 10-100x for time-filtered queries',
|
|
442
|
+
'Maintenance: VACUUM/ANALYZE per partition',
|
|
443
|
+
'Archival: DROP old partitions instantly'
|
|
444
|
+
],
|
|
445
|
+
migrationPlan: {
|
|
446
|
+
steps: [
|
|
447
|
+
'1. Create partitioned table with identical schema',
|
|
448
|
+
'2. Create partitions covering data range',
|
|
449
|
+
'3. Copy data in batches during maintenance window',
|
|
450
|
+
'4. Recreate indexes on partitioned table',
|
|
451
|
+
'5. Rename tables to swap',
|
|
452
|
+
'6. Drop old table after verification'
|
|
453
|
+
],
|
|
454
|
+
estimatedDuration: rowCount > 10000000 ? '2-8 hours' : '30 min - 2 hours',
|
|
455
|
+
downtime: '5-15 minutes for final swap'
|
|
456
|
+
},
|
|
457
|
+
ddl: generateRangePartitionDDL(schema, table, candidate.column, interval)
|
|
458
|
+
};
|
|
459
|
+
}
|
|
460
|
+
else if (candidate.strategy === 'hash') {
|
|
461
|
+
return {
|
|
462
|
+
strategy: 'hash',
|
|
463
|
+
partitionKey: candidate.column,
|
|
464
|
+
partitions: Math.min(partitionCount, 16),
|
|
465
|
+
rationale: [
|
|
466
|
+
'Even data distribution across partitions',
|
|
467
|
+
'Good for parallel query execution',
|
|
468
|
+
'No need to manage partition boundaries'
|
|
469
|
+
],
|
|
470
|
+
ddl: generateHashPartitionDDL(schema, table, candidate.column, Math.min(partitionCount, 16))
|
|
471
|
+
};
|
|
472
|
+
}
|
|
473
|
+
else if (candidate.strategy === 'list') {
|
|
474
|
+
return {
|
|
475
|
+
strategy: 'list',
|
|
476
|
+
partitionKey: candidate.column,
|
|
477
|
+
rationale: [
|
|
478
|
+
`Low cardinality column (${candidate.cardinality} distinct values)`,
|
|
479
|
+
'Natural data segregation',
|
|
480
|
+
'Easy to add new partitions for new values'
|
|
481
|
+
],
|
|
482
|
+
ddl: generateListPartitionDDL(schema, table, candidate.column)
|
|
483
|
+
};
|
|
484
|
+
}
|
|
485
|
+
return null;
|
|
486
|
+
}
|
|
487
|
+
function parseTargetSize(size) {
|
|
488
|
+
const match = size.match(/^(\d+)(GB|MB|KB)?$/i);
|
|
489
|
+
if (!match)
|
|
490
|
+
return 1073741824; // Default 1GB
|
|
491
|
+
const num = parseInt(match[1], 10);
|
|
492
|
+
const unit = (match[2] || 'GB').toUpperCase();
|
|
493
|
+
switch (unit) {
|
|
494
|
+
case 'KB': return num * 1024;
|
|
495
|
+
case 'MB': return num * 1024 * 1024;
|
|
496
|
+
case 'GB': return num * 1024 * 1024 * 1024;
|
|
497
|
+
default: return num * 1024 * 1024 * 1024;
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
function getStrategyRationale(candidate) {
|
|
501
|
+
switch (candidate.strategy) {
|
|
502
|
+
case 'range': return 'Good for time-series queries with date filtering';
|
|
503
|
+
case 'hash': return 'Good for parallel queries across all data';
|
|
504
|
+
case 'list': return `Natural grouping by ${candidate.cardinality} distinct values`;
|
|
505
|
+
default: return '';
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
function generateRangePartitionDDL(schema, table, column, interval) {
|
|
509
|
+
return [
|
|
510
|
+
`-- Create partitioned table`,
|
|
511
|
+
`CREATE TABLE ${schema}.${table}_partitioned (`,
|
|
512
|
+
` -- Copy columns from original table`,
|
|
513
|
+
` LIKE ${schema}.${table} INCLUDING ALL`,
|
|
514
|
+
`) PARTITION BY RANGE (${column});`,
|
|
515
|
+
``,
|
|
516
|
+
`-- Create partitions (example for ${interval} intervals)`,
|
|
517
|
+
`CREATE TABLE ${schema}.${table}_2024_01 PARTITION OF ${schema}.${table}_partitioned`,
|
|
518
|
+
` FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');`,
|
|
519
|
+
``,
|
|
520
|
+
`-- Add more partitions as needed`,
|
|
521
|
+
`-- CREATE TABLE ${schema}.${table}_2024_02 PARTITION OF ${schema}.${table}_partitioned`,
|
|
522
|
+
`-- FOR VALUES FROM ('2024-02-01') TO ('2024-03-01');`
|
|
523
|
+
];
|
|
524
|
+
}
|
|
525
|
+
function generateHashPartitionDDL(schema, table, column, partitions) {
|
|
526
|
+
const ddl = [
|
|
527
|
+
`-- Create partitioned table`,
|
|
528
|
+
`CREATE TABLE ${schema}.${table}_partitioned (`,
|
|
529
|
+
` LIKE ${schema}.${table} INCLUDING ALL`,
|
|
530
|
+
`) PARTITION BY HASH (${column});`,
|
|
531
|
+
``
|
|
532
|
+
];
|
|
533
|
+
for (let i = 0; i < partitions; i++) {
|
|
534
|
+
ddl.push(`CREATE TABLE ${schema}.${table}_p${i} PARTITION OF ${schema}.${table}_partitioned`);
|
|
535
|
+
ddl.push(` FOR VALUES WITH (MODULUS ${partitions}, REMAINDER ${i});`);
|
|
536
|
+
}
|
|
537
|
+
return ddl;
|
|
538
|
+
}
|
|
539
|
+
function generateListPartitionDDL(schema, table, column) {
|
|
540
|
+
return [
|
|
541
|
+
`-- Create partitioned table`,
|
|
542
|
+
`CREATE TABLE ${schema}.${table}_partitioned (`,
|
|
543
|
+
` LIKE ${schema}.${table} INCLUDING ALL`,
|
|
544
|
+
`) PARTITION BY LIST (${column});`,
|
|
545
|
+
``,
|
|
546
|
+
`-- Create partitions for each distinct value`,
|
|
547
|
+
`-- First, identify distinct values:`,
|
|
548
|
+
`-- SELECT DISTINCT ${column} FROM ${schema}.${table};`,
|
|
549
|
+
``,
|
|
550
|
+
`-- Then create partitions:`,
|
|
551
|
+
`-- CREATE TABLE ${schema}.${table}_value1 PARTITION OF ${schema}.${table}_partitioned`,
|
|
552
|
+
`-- FOR VALUES IN ('value1');`
|
|
553
|
+
];
|
|
554
|
+
}
|
|
555
|
+
export async function detectAnomalies(connection, logger, args) {
|
|
556
|
+
const { type, schema, table, timeWindow, sensitivityLevel, zScoreThreshold } = args;
|
|
557
|
+
logger.info('detectAnomalies', 'Detecting anomalies', { type, schema, timeWindow });
|
|
558
|
+
const anomalies = [];
|
|
559
|
+
const sensitivityMultiplier = sensitivityLevel === 'high' ? 0.5 : sensitivityLevel === 'low' ? 2 : 1;
|
|
560
|
+
const threshold = zScoreThreshold * sensitivityMultiplier;
|
|
561
|
+
// Parse time window
|
|
562
|
+
const windowMatch = timeWindow.match(/^(\d+)(h|d|w|m)$/);
|
|
563
|
+
const windowHours = windowMatch
|
|
564
|
+
? parseInt(windowMatch[1], 10) * (windowMatch[2] === 'h' ? 1 :
|
|
565
|
+
windowMatch[2] === 'd' ? 24 :
|
|
566
|
+
windowMatch[2] === 'w' ? 168 :
|
|
567
|
+
windowMatch[2] === 'm' ? 720 : 24)
|
|
568
|
+
: 24;
|
|
569
|
+
// Query performance anomalies
|
|
570
|
+
if (type === 'all' || type === 'query_performance') {
|
|
571
|
+
const queryAnomaliesQuery = `
|
|
572
|
+
WITH query_stats AS (
|
|
573
|
+
SELECT
|
|
574
|
+
queryid,
|
|
575
|
+
query,
|
|
576
|
+
calls,
|
|
577
|
+
mean_exec_time,
|
|
578
|
+
stddev_exec_time,
|
|
579
|
+
total_exec_time,
|
|
580
|
+
rows
|
|
581
|
+
FROM pg_stat_statements
|
|
582
|
+
WHERE calls > 10
|
|
583
|
+
),
|
|
584
|
+
stats_analysis AS (
|
|
585
|
+
SELECT
|
|
586
|
+
*,
|
|
587
|
+
AVG(mean_exec_time) OVER() as global_avg,
|
|
588
|
+
STDDEV(mean_exec_time) OVER() as global_stddev
|
|
589
|
+
FROM query_stats
|
|
590
|
+
)
|
|
591
|
+
SELECT
|
|
592
|
+
queryid,
|
|
593
|
+
LEFT(query, 200) as query_fragment,
|
|
594
|
+
calls,
|
|
595
|
+
ROUND(mean_exec_time::numeric, 2) as mean_time_ms,
|
|
596
|
+
ROUND(total_exec_time::numeric, 2) as total_time_ms,
|
|
597
|
+
ROUND(global_avg::numeric, 2) as avg_mean_time,
|
|
598
|
+
ROUND(global_stddev::numeric, 2) as stddev_time,
|
|
599
|
+
CASE
|
|
600
|
+
WHEN global_stddev = 0 THEN 0
|
|
601
|
+
ELSE ROUND(((mean_exec_time - global_avg) / global_stddev)::numeric, 2)
|
|
602
|
+
END as z_score
|
|
603
|
+
FROM stats_analysis
|
|
604
|
+
WHERE global_stddev > 0
|
|
605
|
+
AND ABS((mean_exec_time - global_avg) / global_stddev) > ${threshold}
|
|
606
|
+
ORDER BY ABS((mean_exec_time - global_avg) / global_stddev) DESC
|
|
607
|
+
LIMIT 20
|
|
608
|
+
`;
|
|
609
|
+
try {
|
|
610
|
+
const result = await executeInternalQuery(connection, logger, { query: queryAnomaliesQuery, params: [] });
|
|
611
|
+
for (const row of result.rows) {
|
|
612
|
+
const zScore = parseFloat(row.z_score || '0');
|
|
613
|
+
const severity = Math.abs(zScore) > 4 ? 'critical' : Math.abs(zScore) > 3 ? 'high' : 'medium';
|
|
614
|
+
anomalies.push({
|
|
615
|
+
type: 'query_performance',
|
|
616
|
+
severity,
|
|
617
|
+
description: zScore > 0 ? 'Unusually slow query' : 'Unusually fast query',
|
|
618
|
+
details: {
|
|
619
|
+
query: row.query_fragment,
|
|
620
|
+
meanTimeMs: row.mean_time_ms,
|
|
621
|
+
avgMeanTimeMs: row.avg_mean_time,
|
|
622
|
+
zScore: row.z_score,
|
|
623
|
+
calls: parseInt(row.calls || '0', 10),
|
|
624
|
+
totalTimeMs: row.total_time_ms
|
|
625
|
+
},
|
|
626
|
+
recommendations: zScore > 0 ? [
|
|
627
|
+
'Check for missing indexes',
|
|
628
|
+
'Analyze query plan with EXPLAIN ANALYZE',
|
|
629
|
+
'Review recent schema or data changes'
|
|
630
|
+
] : [
|
|
631
|
+
'Query is performing better than average',
|
|
632
|
+
'Consider using similar patterns elsewhere'
|
|
633
|
+
]
|
|
634
|
+
});
|
|
635
|
+
}
|
|
636
|
+
}
|
|
637
|
+
catch {
|
|
638
|
+
// pg_stat_statements might not be available
|
|
639
|
+
}
|
|
640
|
+
}
|
|
641
|
+
// Connection anomalies
|
|
642
|
+
if (type === 'all' || type === 'connections') {
|
|
643
|
+
const connectionQuery = `
|
|
644
|
+
SELECT
|
|
645
|
+
state,
|
|
646
|
+
COUNT(*) as count,
|
|
647
|
+
MAX(EXTRACT(EPOCH FROM (NOW() - backend_start))) as max_duration_sec
|
|
648
|
+
FROM pg_stat_activity
|
|
649
|
+
WHERE backend_type = 'client backend'
|
|
650
|
+
GROUP BY state
|
|
651
|
+
`;
|
|
652
|
+
const maxConnectionsQuery = `
|
|
653
|
+
SELECT setting::int as max_connections
|
|
654
|
+
FROM pg_settings
|
|
655
|
+
WHERE name = 'max_connections'
|
|
656
|
+
`;
|
|
657
|
+
const [connResult, maxResult] = await Promise.all([
|
|
658
|
+
executeInternalQuery(connection, logger, { query: connectionQuery, params: [] }),
|
|
659
|
+
executeInternalQuery(connection, logger, { query: maxConnectionsQuery, params: [] })
|
|
660
|
+
]);
|
|
661
|
+
const maxConnections = parseInt(maxResult.rows[0]?.max_connections || '100', 10);
|
|
662
|
+
let totalConnections = 0;
|
|
663
|
+
let idleInTransaction = 0;
|
|
664
|
+
for (const row of connResult.rows) {
|
|
665
|
+
const count = parseInt(row.count || '0', 10);
|
|
666
|
+
totalConnections += count;
|
|
667
|
+
if (row.state === 'idle in transaction') {
|
|
668
|
+
idleInTransaction = count;
|
|
669
|
+
}
|
|
670
|
+
}
|
|
671
|
+
const connectionRatio = totalConnections / maxConnections;
|
|
672
|
+
if (connectionRatio > 0.8) {
|
|
673
|
+
anomalies.push({
|
|
674
|
+
type: 'connections',
|
|
675
|
+
severity: connectionRatio > 0.95 ? 'critical' : 'high',
|
|
676
|
+
description: 'High connection usage',
|
|
677
|
+
details: {
|
|
678
|
+
currentConnections: totalConnections,
|
|
679
|
+
maxConnections,
|
|
680
|
+
usagePercent: (connectionRatio * 100).toFixed(1) + '%'
|
|
681
|
+
},
|
|
682
|
+
recommendations: [
|
|
683
|
+
'Review connection pooling configuration',
|
|
684
|
+
'Check for connection leaks',
|
|
685
|
+
'Consider increasing max_connections'
|
|
686
|
+
]
|
|
687
|
+
});
|
|
688
|
+
}
|
|
689
|
+
if (idleInTransaction > 5) {
|
|
690
|
+
anomalies.push({
|
|
691
|
+
type: 'connections',
|
|
692
|
+
severity: idleInTransaction > 20 ? 'high' : 'medium',
|
|
693
|
+
description: 'Idle in transaction connections',
|
|
694
|
+
details: {
|
|
695
|
+
idleInTransaction,
|
|
696
|
+
issue: 'Connections holding transactions without activity'
|
|
697
|
+
},
|
|
698
|
+
recommendations: [
|
|
699
|
+
'Review application transaction handling',
|
|
700
|
+
'Set idle_in_transaction_session_timeout',
|
|
701
|
+
'Check for uncommitted transactions'
|
|
702
|
+
]
|
|
703
|
+
});
|
|
704
|
+
}
|
|
705
|
+
}
|
|
706
|
+
// Table bloat anomalies
|
|
707
|
+
if (type === 'all' || type === 'data_volume') {
|
|
708
|
+
const sanitizedSchema = sanitizeIdentifier(schema);
|
|
709
|
+
const tableFilter = table ? `AND relname = '${sanitizeIdentifier(table)}'` : '';
|
|
710
|
+
const bloatQuery = `
|
|
711
|
+
SELECT
|
|
712
|
+
schemaname,
|
|
713
|
+
relname as table_name,
|
|
714
|
+
n_live_tup as live_tuples,
|
|
715
|
+
n_dead_tup as dead_tuples,
|
|
716
|
+
CASE
|
|
717
|
+
WHEN n_live_tup + n_dead_tup = 0 THEN 0
|
|
718
|
+
ELSE ROUND((n_dead_tup::numeric / (n_live_tup + n_dead_tup)) * 100, 2)
|
|
719
|
+
END as dead_percent,
|
|
720
|
+
last_vacuum,
|
|
721
|
+
last_autovacuum
|
|
722
|
+
FROM pg_stat_user_tables
|
|
723
|
+
WHERE schemaname = '${sanitizedSchema}'
|
|
724
|
+
${tableFilter}
|
|
725
|
+
AND n_dead_tup > 10000
|
|
726
|
+
ORDER BY n_dead_tup DESC
|
|
727
|
+
LIMIT 20
|
|
728
|
+
`;
|
|
729
|
+
const bloatResult = await executeInternalQuery(connection, logger, { query: bloatQuery, params: [] });
|
|
730
|
+
for (const row of bloatResult.rows) {
|
|
731
|
+
const deadPercent = parseFloat(row.dead_percent || '0');
|
|
732
|
+
if (deadPercent > 30) {
|
|
733
|
+
anomalies.push({
|
|
734
|
+
type: 'data_volume',
|
|
735
|
+
severity: deadPercent > 50 ? 'critical' : 'high',
|
|
736
|
+
description: 'High dead tuple ratio',
|
|
737
|
+
details: {
|
|
738
|
+
table: `${row.schemaname}.${row.table_name}`,
|
|
739
|
+
liveTuples: parseInt(row.live_tuples || '0', 10),
|
|
740
|
+
deadTuples: parseInt(row.dead_tuples || '0', 10),
|
|
741
|
+
deadPercent: deadPercent + '%',
|
|
742
|
+
lastVacuum: row.last_vacuum || 'never',
|
|
743
|
+
lastAutovacuum: row.last_autovacuum || 'never'
|
|
744
|
+
},
|
|
745
|
+
recommendations: [
|
|
746
|
+
`VACUUM ANALYZE ${row.schemaname}.${row.table_name};`,
|
|
747
|
+
'Review autovacuum settings',
|
|
748
|
+
'Consider VACUUM FULL for severe bloat (requires exclusive lock)'
|
|
749
|
+
]
|
|
750
|
+
});
|
|
751
|
+
}
|
|
752
|
+
}
|
|
753
|
+
}
|
|
754
|
+
// Error rate anomalies (from pg_stat_database)
|
|
755
|
+
if (type === 'all' || type === 'errors') {
|
|
756
|
+
const errorQuery = `
|
|
757
|
+
SELECT
|
|
758
|
+
datname,
|
|
759
|
+
xact_commit,
|
|
760
|
+
xact_rollback,
|
|
761
|
+
CASE
|
|
762
|
+
WHEN xact_commit + xact_rollback = 0 THEN 0
|
|
763
|
+
ELSE ROUND((xact_rollback::numeric / (xact_commit + xact_rollback)) * 100, 2)
|
|
764
|
+
END as rollback_percent,
|
|
765
|
+
conflicts,
|
|
766
|
+
deadlocks
|
|
767
|
+
FROM pg_stat_database
|
|
768
|
+
WHERE datname = current_database()
|
|
769
|
+
`;
|
|
770
|
+
const errorResult = await executeInternalQuery(connection, logger, { query: errorQuery, params: [] });
|
|
771
|
+
if (errorResult.rows.length > 0) {
|
|
772
|
+
const row = errorResult.rows[0];
|
|
773
|
+
const rollbackPercent = parseFloat(row.rollback_percent || '0');
|
|
774
|
+
const deadlocks = parseInt(row.deadlocks || '0', 10);
|
|
775
|
+
if (rollbackPercent > 5) {
|
|
776
|
+
anomalies.push({
|
|
777
|
+
type: 'errors',
|
|
778
|
+
severity: rollbackPercent > 20 ? 'critical' : rollbackPercent > 10 ? 'high' : 'medium',
|
|
779
|
+
description: 'High transaction rollback rate',
|
|
780
|
+
details: {
|
|
781
|
+
commits: parseInt(row.xact_commit || '0', 10),
|
|
782
|
+
rollbacks: parseInt(row.xact_rollback || '0', 10),
|
|
783
|
+
rollbackPercent: rollbackPercent + '%'
|
|
784
|
+
},
|
|
785
|
+
recommendations: [
|
|
786
|
+
'Review application error handling',
|
|
787
|
+
'Check for constraint violations',
|
|
788
|
+
'Analyze transaction patterns'
|
|
789
|
+
]
|
|
790
|
+
});
|
|
791
|
+
}
|
|
792
|
+
if (deadlocks > 0) {
|
|
793
|
+
anomalies.push({
|
|
794
|
+
type: 'errors',
|
|
795
|
+
severity: deadlocks > 10 ? 'high' : 'medium',
|
|
796
|
+
description: 'Deadlocks detected',
|
|
797
|
+
details: {
|
|
798
|
+
deadlockCount: deadlocks
|
|
799
|
+
},
|
|
800
|
+
recommendations: [
|
|
801
|
+
'Review transaction lock ordering',
|
|
802
|
+
'Check for long-running transactions',
|
|
803
|
+
'Consider using advisory locks'
|
|
804
|
+
]
|
|
805
|
+
});
|
|
806
|
+
}
|
|
807
|
+
}
|
|
808
|
+
}
|
|
809
|
+
// Sort by severity
|
|
810
|
+
const severityOrder = { critical: 0, high: 1, medium: 2, low: 3 };
|
|
811
|
+
anomalies.sort((a, b) => severityOrder[a.severity] - severityOrder[b.severity]);
|
|
812
|
+
return {
|
|
813
|
+
timeWindow,
|
|
814
|
+
sensitivityLevel,
|
|
815
|
+
anomaliesFound: anomalies.length,
|
|
816
|
+
anomalies,
|
|
817
|
+
summary: {
|
|
818
|
+
bySeverity: {
|
|
819
|
+
critical: anomalies.filter(a => a.severity === 'critical').length,
|
|
820
|
+
high: anomalies.filter(a => a.severity === 'high').length,
|
|
821
|
+
medium: anomalies.filter(a => a.severity === 'medium').length,
|
|
822
|
+
low: anomalies.filter(a => a.severity === 'low').length
|
|
823
|
+
},
|
|
824
|
+
byType: {
|
|
825
|
+
query_performance: anomalies.filter(a => a.type === 'query_performance').length,
|
|
826
|
+
connections: anomalies.filter(a => a.type === 'connections').length,
|
|
827
|
+
data_volume: anomalies.filter(a => a.type === 'data_volume').length,
|
|
828
|
+
errors: anomalies.filter(a => a.type === 'errors').length
|
|
829
|
+
},
|
|
830
|
+
healthStatus: anomalies.some(a => a.severity === 'critical') ? 'critical' :
|
|
831
|
+
anomalies.some(a => a.severity === 'high') ? 'attention_needed' :
|
|
832
|
+
anomalies.length > 0 ? 'minor_issues' : 'healthy'
|
|
833
|
+
}
|
|
834
|
+
};
|
|
835
|
+
}
|
|
836
|
+
export async function optimizeQuery(connection, logger, args) {
|
|
837
|
+
const { query, includeRewrite, includeIndexes, targetTimeMs } = args;
|
|
838
|
+
logger.info('optimizeQuery', 'Analyzing query for optimization');
|
|
839
|
+
assertNoSensitiveCatalogAccess(query);
|
|
840
|
+
// Validate query is a SELECT (we only optimize read queries)
|
|
841
|
+
const trimmedQuery = query.trim().toLowerCase();
|
|
842
|
+
if (!trimmedQuery.startsWith('select')) {
|
|
843
|
+
return {
|
|
844
|
+
error: 'Only SELECT queries can be optimized',
|
|
845
|
+
query: query.substring(0, 100)
|
|
846
|
+
};
|
|
847
|
+
}
|
|
848
|
+
// Get execution plan — skip ANALYZE in read-only mode (it executes the query)
|
|
849
|
+
const isReadOnly = connection.config.mode === 'read-only';
|
|
850
|
+
const explainOptions = isReadOnly ? 'BUFFERS, FORMAT JSON' : 'ANALYZE, BUFFERS, FORMAT JSON';
|
|
851
|
+
const explainQuery = `EXPLAIN (${explainOptions}) ${query}`;
|
|
852
|
+
if (isReadOnly) {
|
|
853
|
+
logger.warn('optimizeQuery', 'Read-only mode: ANALYZE disabled, using estimated plan only');
|
|
854
|
+
}
|
|
855
|
+
let planResult;
|
|
856
|
+
try {
|
|
857
|
+
planResult = await executeInternalQuery(connection, logger, { query: explainQuery, params: [] });
|
|
858
|
+
}
|
|
859
|
+
catch (error) {
|
|
860
|
+
return {
|
|
861
|
+
error: `Query execution failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
862
|
+
query: query.substring(0, 200)
|
|
863
|
+
};
|
|
864
|
+
}
|
|
865
|
+
const plan = planResult.rows[0]['QUERY PLAN'][0];
|
|
866
|
+
const hasAnalyze = plan['Execution Time'] !== undefined;
|
|
867
|
+
const planningTime = plan['Planning Time'] ?? null;
|
|
868
|
+
const executionTime = plan['Execution Time'] ?? null;
|
|
869
|
+
const totalTime = (planningTime != null && executionTime != null)
|
|
870
|
+
? planningTime + executionTime
|
|
871
|
+
: null;
|
|
872
|
+
const issues = [];
|
|
873
|
+
const optimizations = [];
|
|
874
|
+
// Analyze the plan recursively — use Plan Rows as fallback when Actual Rows unavailable
|
|
875
|
+
analyzeNode(plan.Plan, issues, 0, hasAnalyze);
|
|
876
|
+
// Generate optimizations based on issues
|
|
877
|
+
let priority = 1;
|
|
878
|
+
for (const issue of issues) {
|
|
879
|
+
if (issue.type === 'sequential_scan' && includeIndexes) {
|
|
880
|
+
optimizations.push({
|
|
881
|
+
type: 'add_index',
|
|
882
|
+
priority: priority++,
|
|
883
|
+
description: `Add index on ${issue.table} for filter columns`,
|
|
884
|
+
table: issue.table,
|
|
885
|
+
impact: issue.impact,
|
|
886
|
+
details: issue.description,
|
|
887
|
+
notes: [
|
|
888
|
+
'Analyze the filter condition to determine which columns need indexing',
|
|
889
|
+
'Use EXPLAIN to identify the filter columns',
|
|
890
|
+
'Consider composite index if multiple columns are filtered'
|
|
891
|
+
]
|
|
892
|
+
});
|
|
893
|
+
}
|
|
894
|
+
if (issue.type === 'sort_memory') {
|
|
895
|
+
optimizations.push({
|
|
896
|
+
type: 'increase_work_mem',
|
|
897
|
+
priority: priority++,
|
|
898
|
+
description: 'Increase work_mem to avoid disk sorts',
|
|
899
|
+
impact: issue.impact,
|
|
900
|
+
sql: `SET work_mem = '256MB'; -- or adjust in postgresql.conf`,
|
|
901
|
+
notes: [
|
|
902
|
+
'Be careful with high work_mem on systems with many connections',
|
|
903
|
+
'Consider setting per-session for specific queries'
|
|
904
|
+
]
|
|
905
|
+
});
|
|
906
|
+
}
|
|
907
|
+
if (issue.type === 'nested_loop' && issue.rows > 1000) {
|
|
908
|
+
optimizations.push({
|
|
909
|
+
type: 'query_rewrite',
|
|
910
|
+
priority: priority++,
|
|
911
|
+
description: 'Consider rewriting nested loop join',
|
|
912
|
+
impact: issue.impact,
|
|
913
|
+
details: `Nested loop processing ${issue.rows} rows`,
|
|
914
|
+
notes: [
|
|
915
|
+
'Check if hash join or merge join would be better',
|
|
916
|
+
'Ensure join columns have indexes',
|
|
917
|
+
'Consider query restructuring'
|
|
918
|
+
]
|
|
919
|
+
});
|
|
920
|
+
}
|
|
921
|
+
}
|
|
922
|
+
// Query rewrite suggestions
|
|
923
|
+
if (includeRewrite) {
|
|
924
|
+
// Check for SELECT *
|
|
925
|
+
if (query.toLowerCase().includes('select *')) {
|
|
926
|
+
optimizations.push({
|
|
927
|
+
type: 'query_rewrite',
|
|
928
|
+
priority: priority++,
|
|
929
|
+
description: 'Replace SELECT * with specific columns',
|
|
930
|
+
rationale: 'Reduces I/O and memory usage, enables index-only scans',
|
|
931
|
+
before: 'SELECT * FROM ...',
|
|
932
|
+
after: 'SELECT column1, column2, ... FROM ...',
|
|
933
|
+
impact: 'medium'
|
|
934
|
+
});
|
|
935
|
+
}
|
|
936
|
+
// Check for missing LIMIT
|
|
937
|
+
const rowCount = plan.Plan['Actual Rows'] ?? plan.Plan['Plan Rows'] ?? 0;
|
|
938
|
+
if (!query.toLowerCase().includes('limit') && rowCount > 1000) {
|
|
939
|
+
optimizations.push({
|
|
940
|
+
type: 'query_rewrite',
|
|
941
|
+
priority: priority++,
|
|
942
|
+
description: 'Add LIMIT clause',
|
|
943
|
+
rationale: hasAnalyze
|
|
944
|
+
? `Query returned ${rowCount} rows`
|
|
945
|
+
: `Query estimated to return ${rowCount} rows`,
|
|
946
|
+
notes: [
|
|
947
|
+
'If you only need a subset of results, add LIMIT',
|
|
948
|
+
'Consider pagination for large result sets'
|
|
949
|
+
],
|
|
950
|
+
impact: 'low'
|
|
951
|
+
});
|
|
952
|
+
}
|
|
953
|
+
// Check for inefficient OR conditions
|
|
954
|
+
if (query.toLowerCase().includes(' or ')) {
|
|
955
|
+
optimizations.push({
|
|
956
|
+
type: 'query_rewrite',
|
|
957
|
+
priority: priority++,
|
|
958
|
+
description: 'Consider replacing OR with UNION',
|
|
959
|
+
rationale: 'OR conditions can prevent index usage',
|
|
960
|
+
before: 'SELECT ... WHERE a = 1 OR b = 2',
|
|
961
|
+
after: 'SELECT ... WHERE a = 1 UNION ALL SELECT ... WHERE b = 2',
|
|
962
|
+
notes: ['Only beneficial if each condition can use an index'],
|
|
963
|
+
impact: 'medium'
|
|
964
|
+
});
|
|
965
|
+
}
|
|
966
|
+
}
|
|
967
|
+
// Sort optimizations by priority
|
|
968
|
+
optimizations.sort((a, b) => a.priority - b.priority);
|
|
969
|
+
// Estimate optimized time (only meaningful with ANALYZE data)
|
|
970
|
+
let estimatedOptimizedTime = totalTime;
|
|
971
|
+
if (totalTime != null) {
|
|
972
|
+
for (const opt of optimizations) {
|
|
973
|
+
if (opt.impact === 'critical')
|
|
974
|
+
estimatedOptimizedTime *= 0.1;
|
|
975
|
+
else if (opt.impact === 'high')
|
|
976
|
+
estimatedOptimizedTime *= 0.3;
|
|
977
|
+
else if (opt.impact === 'medium')
|
|
978
|
+
estimatedOptimizedTime *= 0.7;
|
|
979
|
+
}
|
|
980
|
+
}
|
|
981
|
+
const meetsTarget = (targetTimeMs && totalTime != null) ? totalTime <= targetTimeMs : null;
|
|
982
|
+
const result = {
|
|
983
|
+
query: query.length > 500 ? query.substring(0, 500) + '...' : query,
|
|
984
|
+
executionPlan: {
|
|
985
|
+
planningTime: planningTime != null ? planningTime.toFixed(2) + 'ms' : 'N/A',
|
|
986
|
+
executionTime: executionTime != null ? executionTime.toFixed(2) + 'ms' : 'N/A (ANALYZE unavailable in read-only mode)',
|
|
987
|
+
totalTime: totalTime != null ? totalTime.toFixed(2) + 'ms' : 'N/A'
|
|
988
|
+
},
|
|
989
|
+
targetTimeMs: targetTimeMs || null,
|
|
990
|
+
meetsTarget,
|
|
991
|
+
issues,
|
|
992
|
+
optimizations,
|
|
993
|
+
estimatedOptimizedTime: estimatedOptimizedTime != null
|
|
994
|
+
? estimatedOptimizedTime.toFixed(2) + 'ms'
|
|
995
|
+
: 'N/A',
|
|
996
|
+
summary: {
|
|
997
|
+
issuesFound: issues.length,
|
|
998
|
+
optimizationsAvailable: optimizations.length,
|
|
999
|
+
criticalIssues: issues.filter(i => i.impact === 'critical').length,
|
|
1000
|
+
highImpactIssues: issues.filter(i => i.impact === 'high').length
|
|
1001
|
+
}
|
|
1002
|
+
};
|
|
1003
|
+
if (isReadOnly) {
|
|
1004
|
+
result.estimatedPlanOnly = true;
|
|
1005
|
+
result.note = 'Read-only mode: EXPLAIN ANALYZE unavailable. Timing and row counts are estimated from the query planner, not actual execution.';
|
|
1006
|
+
}
|
|
1007
|
+
return result;
|
|
1008
|
+
}
|
|
1009
|
+
function analyzeNode(node, issues, depth, hasAnalyze = true) {
|
|
1010
|
+
if (!node)
|
|
1011
|
+
return;
|
|
1012
|
+
const nodeType = node['Node Type'];
|
|
1013
|
+
const actualRows = node['Actual Rows'];
|
|
1014
|
+
const planRows = node['Plan Rows'] || 0;
|
|
1015
|
+
const rows = actualRows ?? planRows;
|
|
1016
|
+
const actualTime = node['Actual Total Time'] || 0;
|
|
1017
|
+
// Check for sequential scans on large tables
|
|
1018
|
+
if (nodeType === 'Seq Scan' && rows > 1000) {
|
|
1019
|
+
const impact = rows > 100000 ? 'critical' : rows > 10000 ? 'high' : 'medium';
|
|
1020
|
+
const rowLabel = hasAnalyze ? 'reading' : 'estimated';
|
|
1021
|
+
issues.push({
|
|
1022
|
+
type: 'sequential_scan',
|
|
1023
|
+
table: node['Relation Name'],
|
|
1024
|
+
rows,
|
|
1025
|
+
impact,
|
|
1026
|
+
description: `Sequential scan on ${node['Relation Name']} ${rowLabel} ${rows.toLocaleString()} rows`
|
|
1027
|
+
});
|
|
1028
|
+
}
|
|
1029
|
+
// Check for sorts spilling to disk
|
|
1030
|
+
if (nodeType === 'Sort' && node['Sort Method']?.includes('external')) {
|
|
1031
|
+
issues.push({
|
|
1032
|
+
type: 'sort_memory',
|
|
1033
|
+
impact: 'high',
|
|
1034
|
+
description: `Sort operation spilling to disk (${node['Sort Space Used']} kB)`
|
|
1035
|
+
});
|
|
1036
|
+
}
|
|
1037
|
+
// Check for nested loops with high row counts
|
|
1038
|
+
if (nodeType === 'Nested Loop' && rows > 1000) {
|
|
1039
|
+
issues.push({
|
|
1040
|
+
type: 'nested_loop',
|
|
1041
|
+
rows,
|
|
1042
|
+
impact: rows > 10000 ? 'high' : 'medium',
|
|
1043
|
+
description: `Nested loop join processing ${rows.toLocaleString()} rows`
|
|
1044
|
+
});
|
|
1045
|
+
}
|
|
1046
|
+
// Check for bad row estimates (can indicate stale statistics) — only with ANALYZE
|
|
1047
|
+
if (hasAnalyze && planRows > 0 && actualRows > 0) {
|
|
1048
|
+
const ratio = actualRows / planRows;
|
|
1049
|
+
if (ratio > 10 || ratio < 0.1) {
|
|
1050
|
+
issues.push({
|
|
1051
|
+
type: 'estimate_error',
|
|
1052
|
+
impact: 'medium',
|
|
1053
|
+
description: `Row estimate off by ${ratio.toFixed(1)}x (planned: ${planRows}, actual: ${actualRows})`,
|
|
1054
|
+
recommendation: `ANALYZE the affected table to update statistics`
|
|
1055
|
+
});
|
|
1056
|
+
}
|
|
1057
|
+
}
|
|
1058
|
+
// Check for hash operations using too much memory
|
|
1059
|
+
if (nodeType === 'Hash' && node['Peak Memory Usage']) {
|
|
1060
|
+
const memoryKB = node['Peak Memory Usage'];
|
|
1061
|
+
if (memoryKB > 100000) { // > 100MB
|
|
1062
|
+
issues.push({
|
|
1063
|
+
type: 'hash_memory',
|
|
1064
|
+
impact: 'medium',
|
|
1065
|
+
description: `Hash operation using ${(memoryKB / 1024).toFixed(0)} MB`
|
|
1066
|
+
});
|
|
1067
|
+
}
|
|
1068
|
+
}
|
|
1069
|
+
// Recursively analyze child nodes
|
|
1070
|
+
if (node.Plans) {
|
|
1071
|
+
for (const childNode of node.Plans) {
|
|
1072
|
+
analyzeNode(childNode, issues, depth + 1, hasAnalyze);
|
|
1073
|
+
}
|
|
1074
|
+
}
|
|
1075
|
+
}
|
|
1076
|
+
export const optimizationTools = {
|
|
1077
|
+
suggestIndexes: {
|
|
1078
|
+
schema: SuggestIndexesSchema,
|
|
1079
|
+
handler: suggestIndexes
|
|
1080
|
+
},
|
|
1081
|
+
suggestPartitioning: {
|
|
1082
|
+
schema: SuggestPartitioningSchema,
|
|
1083
|
+
handler: suggestPartitioning
|
|
1084
|
+
},
|
|
1085
|
+
detectAnomalies: {
|
|
1086
|
+
schema: DetectAnomaliesSchema,
|
|
1087
|
+
handler: detectAnomalies
|
|
1088
|
+
},
|
|
1089
|
+
optimizeQuery: {
|
|
1090
|
+
schema: OptimizeQuerySchema,
|
|
1091
|
+
handler: optimizeQuery
|
|
1092
|
+
}
|
|
1093
|
+
};
|
|
1094
|
+
//# sourceMappingURL=optimization.js.map
|