postgresai 0.15.0 → 0.16.0-dev.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -0
- package/bin/postgres-ai.ts +210 -31
- package/dist/bin/postgres-ai.js +7749 -7248
- package/lib/aas-onboard.ts +251 -0
- package/lib/checkup-api.ts +75 -0
- package/lib/checkup-summary.ts +30 -0
- package/lib/checkup.ts +227 -21
- package/lib/metrics-loader.ts +10 -8
- package/lib/util.ts +10 -3
- package/package.json +1 -1
- package/scripts/embed-metrics.ts +7 -6
- package/test/aas-onboard.test.ts +301 -0
- package/test/checkup.integration.test.ts +55 -0
- package/test/checkup.test.ts +471 -1
- package/test/mcp-server.test.ts +4 -0
- package/test/monitoring.test.ts +128 -49
- package/test/schema-validation.test.ts +29 -0
- package/test/test-utils.ts +8 -0
- package/test/util.test.ts +44 -0
package/lib/checkup.ts
CHANGED
|
@@ -243,6 +243,58 @@ export interface RedundantIndex {
|
|
|
243
243
|
redundant_to_parse_error?: string;
|
|
244
244
|
}
|
|
245
245
|
|
|
246
|
+
/**
|
|
247
|
+
* Dead tuples table entry (F003) - matches F003.schema.json deadTuplesTable
|
|
248
|
+
*
|
|
249
|
+
* Sourced from pg_stat_user_tables live counters (n_dead_tup / n_live_tup),
|
|
250
|
+
* so dead tuples that have never been vacuumed are visible - unlike the
|
|
251
|
+
* statistical bloat estimators (F004/F005), which miss them entirely.
|
|
252
|
+
*/
|
|
253
|
+
export interface DeadTuplesTable {
|
|
254
|
+
schema_name: string;
|
|
255
|
+
table_name: string;
|
|
256
|
+
n_live_tup: number;
|
|
257
|
+
n_dead_tup: number;
|
|
258
|
+
/** Dead tuples as percentage of all tuples: n_dead_tup / (n_live_tup + n_dead_tup) * 100 */
|
|
259
|
+
dead_pct: number;
|
|
260
|
+
last_autovacuum: string | null;
|
|
261
|
+
/** Epoch seconds of the last autovacuum; 0 = never */
|
|
262
|
+
last_autovacuum_epoch: number;
|
|
263
|
+
last_vacuum: string | null;
|
|
264
|
+
/** Epoch seconds of the last manual vacuum; 0 = never */
|
|
265
|
+
last_vacuum_epoch: number;
|
|
266
|
+
autovacuum_count: number;
|
|
267
|
+
vacuum_count: number;
|
|
268
|
+
/** True when autovacuum is disabled per-table via reloptions (autovacuum_enabled=off/false/0/...) */
|
|
269
|
+
autovacuum_disabled: boolean;
|
|
270
|
+
table_size_bytes: number;
|
|
271
|
+
table_size_pretty: string;
|
|
272
|
+
/** True when BOTH F003_DEAD_TUPLES_MIN and F003_DEAD_PCT_MIN thresholds are exceeded */
|
|
273
|
+
exceeds_dead_tuple_thresholds: boolean;
|
|
274
|
+
/** True when autovacuum is disabled per-table on a non-tiny table (>= F003_AUTOVACUUM_DISABLED_MIN_ROWS tuples) */
|
|
275
|
+
autovacuum_disabled_flagged: boolean;
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
* F003 thresholds.
|
|
280
|
+
*
|
|
281
|
+
* A table's dead-tuple accumulation is flagged only when it is high in BOTH
|
|
282
|
+
* absolute and relative terms:
|
|
283
|
+
* - F003_DEAD_TUPLES_MIN keeps small/noisy tables out (100k dead tuples is
|
|
284
|
+
* real work for vacuum regardless of table size);
|
|
285
|
+
* - F003_DEAD_PCT_MIN = 20 mirrors the default autovacuum_vacuum_scale_factor
|
|
286
|
+
* of 0.2: with default settings autovacuum should have fired well before a
|
|
287
|
+
* table is 20% dead, so reaching this level in a snapshot is an unambiguous
|
|
288
|
+
* signal that vacuum is not keeping up (lagging, blocked, or disabled).
|
|
289
|
+
*
|
|
290
|
+
* Per-table disabled autovacuum is a classic footgun and is always flagged on
|
|
291
|
+
* non-tiny tables (>= F003_AUTOVACUUM_DISABLED_MIN_ROWS total tuples; same
|
|
292
|
+
* 10k-row "non-tiny" cutoff the classic postgres-checkup F003 uses).
|
|
293
|
+
*/
|
|
294
|
+
export const F003_DEAD_TUPLES_MIN = 100_000;
|
|
295
|
+
export const F003_DEAD_PCT_MIN = 20;
|
|
296
|
+
export const F003_AUTOVACUUM_DISABLED_MIN_ROWS = 10_000;
|
|
297
|
+
|
|
246
298
|
/**
|
|
247
299
|
* I/O statistics by backend type (I001) - matches I001.schema.json backendIOStats
|
|
248
300
|
*/
|
|
@@ -257,7 +309,7 @@ export interface BackendIOStats {
|
|
|
257
309
|
write_bytes_mb: number;
|
|
258
310
|
write_time_ms: number;
|
|
259
311
|
writebacks: number;
|
|
260
|
-
/** Writeback MiB. The historical `_mb` suffix is retained for schema compatibility. */
|
|
312
|
+
/** Writeback MiB. Always 0 on PG18+ (op_bytes removed, no writeback byte counts exposed). The historical `_mb` suffix is retained for schema compatibility. */
|
|
261
313
|
writeback_bytes_mb: number;
|
|
262
314
|
writeback_time_ms: number;
|
|
263
315
|
fsyncs: number;
|
|
@@ -361,11 +413,11 @@ function formatSettingPrettyValue(
|
|
|
361
413
|
if (settingNormalized === null || unitNormalized === null) {
|
|
362
414
|
return rawValue;
|
|
363
415
|
}
|
|
364
|
-
|
|
416
|
+
|
|
365
417
|
if (unitNormalized === "bytes") {
|
|
366
418
|
return formatBytes(settingNormalized);
|
|
367
419
|
}
|
|
368
|
-
|
|
420
|
+
|
|
369
421
|
if (unitNormalized === "seconds") {
|
|
370
422
|
// Format time values with appropriate units based on magnitude:
|
|
371
423
|
// - Sub-second values (< 1s): show in milliseconds for precision
|
|
@@ -380,7 +432,7 @@ function formatSettingPrettyValue(
|
|
|
380
432
|
return `${(settingNormalized / SECONDS_PER_MINUTE).toFixed(1)} min`;
|
|
381
433
|
}
|
|
382
434
|
}
|
|
383
|
-
|
|
435
|
+
|
|
384
436
|
return rawValue;
|
|
385
437
|
}
|
|
386
438
|
|
|
@@ -436,7 +488,7 @@ export async function getSettings(client: Client, pgMajorVersion: number = 16):
|
|
|
436
488
|
const vartype = row.tag_vartype || "";
|
|
437
489
|
const settingNormalized = row.setting_normalized !== null ? parseFloat(row.setting_normalized) : null;
|
|
438
490
|
const unitNormalized = row.unit_normalized || null;
|
|
439
|
-
|
|
491
|
+
|
|
440
492
|
settings[name] = {
|
|
441
493
|
setting: settingValue,
|
|
442
494
|
unit,
|
|
@@ -468,7 +520,7 @@ export async function getAlteredSettings(client: Client, pgMajorVersion: number
|
|
|
468
520
|
const category = row.tag_category || "";
|
|
469
521
|
const settingNormalized = row.setting_normalized !== null ? parseFloat(row.setting_normalized) : null;
|
|
470
522
|
const unitNormalized = row.unit_normalized || null;
|
|
471
|
-
|
|
523
|
+
|
|
472
524
|
settings[name] = {
|
|
473
525
|
value: settingValue,
|
|
474
526
|
unit,
|
|
@@ -737,22 +789,22 @@ export async function getStatsReset(client: Client, pgMajorVersion: number = 16)
|
|
|
737
789
|
const sql = getMetricSql(METRIC_NAMES.statsReset, pgMajorVersion);
|
|
738
790
|
const result = await client.query(sql);
|
|
739
791
|
const row = result.rows[0] || {};
|
|
740
|
-
|
|
792
|
+
|
|
741
793
|
// The stats_reset metric returns stats_reset_epoch and seconds_since_reset
|
|
742
794
|
// We need to calculate additional fields
|
|
743
795
|
const statsResetEpoch = row.stats_reset_epoch ? parseFloat(row.stats_reset_epoch) : null;
|
|
744
796
|
const secondsSinceReset = row.seconds_since_reset ? parseInt(row.seconds_since_reset, 10) : null;
|
|
745
|
-
|
|
797
|
+
|
|
746
798
|
// Calculate stats_reset_time from epoch
|
|
747
|
-
const statsResetTime = statsResetEpoch
|
|
799
|
+
const statsResetTime = statsResetEpoch
|
|
748
800
|
? new Date(statsResetEpoch * 1000).toISOString()
|
|
749
801
|
: null;
|
|
750
|
-
|
|
802
|
+
|
|
751
803
|
// Calculate days since reset
|
|
752
804
|
const daysSinceReset = secondsSinceReset !== null
|
|
753
805
|
? Math.floor(secondsSinceReset / SECONDS_PER_DAY)
|
|
754
806
|
: null;
|
|
755
|
-
|
|
807
|
+
|
|
756
808
|
// Get postmaster startup time separately (simple inline SQL)
|
|
757
809
|
// This is supplementary data - errors are captured in output, not propagated
|
|
758
810
|
let postmasterStartupEpoch: number | null = null;
|
|
@@ -765,8 +817,8 @@ export async function getStatsReset(client: Client, pgMajorVersion: number = 16)
|
|
|
765
817
|
pg_postmaster_start_time()::text as postmaster_startup_time
|
|
766
818
|
`);
|
|
767
819
|
if (pmResult.rows.length > 0) {
|
|
768
|
-
postmasterStartupEpoch = pmResult.rows[0].postmaster_startup_epoch
|
|
769
|
-
? parseFloat(pmResult.rows[0].postmaster_startup_epoch)
|
|
820
|
+
postmasterStartupEpoch = pmResult.rows[0].postmaster_startup_epoch
|
|
821
|
+
? parseFloat(pmResult.rows[0].postmaster_startup_epoch)
|
|
770
822
|
: null;
|
|
771
823
|
postmasterStartupTime = pmResult.rows[0].postmaster_startup_time || null;
|
|
772
824
|
}
|
|
@@ -775,7 +827,7 @@ export async function getStatsReset(client: Client, pgMajorVersion: number = 16)
|
|
|
775
827
|
postmasterStartupError = `Failed to query postmaster start time: ${errorMsg}`;
|
|
776
828
|
console.error(`[getStatsReset] Warning: ${postmasterStartupError}`);
|
|
777
829
|
}
|
|
778
|
-
|
|
830
|
+
|
|
779
831
|
const statsResult: StatsReset = {
|
|
780
832
|
stats_reset_epoch: statsResetEpoch,
|
|
781
833
|
stats_reset_time: statsResetTime,
|
|
@@ -783,12 +835,12 @@ export async function getStatsReset(client: Client, pgMajorVersion: number = 16)
|
|
|
783
835
|
postmaster_startup_epoch: postmasterStartupEpoch,
|
|
784
836
|
postmaster_startup_time: postmasterStartupTime,
|
|
785
837
|
};
|
|
786
|
-
|
|
838
|
+
|
|
787
839
|
// Only include error field if there was an error (keeps output clean)
|
|
788
840
|
if (postmasterStartupError) {
|
|
789
841
|
statsResult.postmaster_startup_error = postmasterStartupError;
|
|
790
842
|
}
|
|
791
|
-
|
|
843
|
+
|
|
792
844
|
return statsResult;
|
|
793
845
|
}
|
|
794
846
|
|
|
@@ -800,7 +852,7 @@ export async function getCurrentDatabaseInfo(client: Client, pgMajorVersion: num
|
|
|
800
852
|
const sql = getMetricSql(METRIC_NAMES.dbSize, pgMajorVersion);
|
|
801
853
|
const result = await client.query(sql);
|
|
802
854
|
const row = result.rows[0] || {};
|
|
803
|
-
|
|
855
|
+
|
|
804
856
|
// db_size metric returns tag_datname and size_b
|
|
805
857
|
return {
|
|
806
858
|
datname: row.tag_datname || "postgres",
|
|
@@ -831,7 +883,7 @@ export async function getRedundantIndexes(client: Client, pgMajorVersion: number
|
|
|
831
883
|
const transformed = transformMetricRow(row);
|
|
832
884
|
const indexSizeBytes = parseInt(String(transformed.index_size_bytes || 0), 10);
|
|
833
885
|
const tableSizeBytes = parseInt(String(transformed.table_size_bytes || 0), 10);
|
|
834
|
-
|
|
886
|
+
|
|
835
887
|
// Parse redundant_to JSON array (indexes that make this one redundant)
|
|
836
888
|
let redundantTo: RedundantToIndex[] = [];
|
|
837
889
|
let parseError: string | undefined;
|
|
@@ -857,7 +909,7 @@ export async function getRedundantIndexes(client: Client, pgMajorVersion: number
|
|
|
857
909
|
parseError = `Failed to parse redundant_to_json: ${errorMsg}`;
|
|
858
910
|
console.error(`[H004] Warning: ${parseError} for index "${indexName}"`);
|
|
859
911
|
}
|
|
860
|
-
|
|
912
|
+
|
|
861
913
|
const result: RedundantIndex = {
|
|
862
914
|
schema_name: String(transformed.schema_name || ""),
|
|
863
915
|
table_name: String(transformed.table_name || ""),
|
|
@@ -874,16 +926,117 @@ export async function getRedundantIndexes(client: Client, pgMajorVersion: number
|
|
|
874
926
|
table_size_pretty: formatBytes(tableSizeBytes),
|
|
875
927
|
redundant_to: redundantTo,
|
|
876
928
|
};
|
|
877
|
-
|
|
929
|
+
|
|
878
930
|
// Only include parse error field if there was an error (keeps output clean)
|
|
879
931
|
if (parseError) {
|
|
880
932
|
result.redundant_to_parse_error = parseError;
|
|
881
933
|
}
|
|
882
|
-
|
|
934
|
+
|
|
883
935
|
return result;
|
|
884
936
|
});
|
|
885
937
|
}
|
|
886
938
|
|
|
939
|
+
/**
|
|
940
|
+
* Get per-table dead-tuple stats and per-table autovacuum overrides (F003).
|
|
941
|
+
* SQL loaded from config/pgwatch-prometheus/metrics.yml (pg_dead_tuples metric).
|
|
942
|
+
*
|
|
943
|
+
* Returns tables that carry dead tuples or have autovacuum disabled per-table,
|
|
944
|
+
* with threshold flags precomputed (see F003_* constants).
|
|
945
|
+
*
|
|
946
|
+
* @param client - Connected PostgreSQL client
|
|
947
|
+
* @param pgMajorVersion - PostgreSQL major version (default: 16)
|
|
948
|
+
* @throws {Error} If database query fails (propagating - critical data)
|
|
949
|
+
*/
|
|
950
|
+
export async function getDeadTuples(client: Client, pgMajorVersion: number = 16): Promise<DeadTuplesTable[]> {
|
|
951
|
+
const sql = getMetricSql(METRIC_NAMES.F003, pgMajorVersion);
|
|
952
|
+
const result = await client.query(sql);
|
|
953
|
+
return result.rows.map((row) => {
|
|
954
|
+
const t = transformMetricRow(row);
|
|
955
|
+
const nLive = parseInt(String(t.n_live_tup || 0), 10);
|
|
956
|
+
const nDead = parseInt(String(t.n_dead_tup || 0), 10);
|
|
957
|
+
const deadPct = parseFloat(String(t.dead_pct)) || 0;
|
|
958
|
+
const lastAutovacuumEpoch = parseInt(String(t.last_autovacuum || 0), 10);
|
|
959
|
+
const lastVacuumEpoch = parseInt(String(t.last_vacuum || 0), 10);
|
|
960
|
+
// The metric emits 0/1; be liberal in what we accept (driver may return strings)
|
|
961
|
+
const autovacuumDisabled = parseInt(String(t.autovacuum_disabled || 0), 10) === 1 || toBool(t.autovacuum_disabled);
|
|
962
|
+
const tableSizeBytes = parseInt(String(t.table_size_b || 0), 10);
|
|
963
|
+
|
|
964
|
+
return {
|
|
965
|
+
schema_name: String(t.schemaname || ""),
|
|
966
|
+
table_name: String(t.relname || ""),
|
|
967
|
+
n_live_tup: nLive,
|
|
968
|
+
n_dead_tup: nDead,
|
|
969
|
+
dead_pct: deadPct,
|
|
970
|
+
last_autovacuum: lastAutovacuumEpoch > 0 ? new Date(lastAutovacuumEpoch * 1000).toISOString() : null,
|
|
971
|
+
last_autovacuum_epoch: lastAutovacuumEpoch,
|
|
972
|
+
last_vacuum: lastVacuumEpoch > 0 ? new Date(lastVacuumEpoch * 1000).toISOString() : null,
|
|
973
|
+
last_vacuum_epoch: lastVacuumEpoch,
|
|
974
|
+
autovacuum_count: parseInt(String(t.autovacuum_count || 0), 10),
|
|
975
|
+
vacuum_count: parseInt(String(t.vacuum_count || 0), 10),
|
|
976
|
+
autovacuum_disabled: autovacuumDisabled,
|
|
977
|
+
table_size_bytes: tableSizeBytes,
|
|
978
|
+
table_size_pretty: formatBytes(tableSizeBytes),
|
|
979
|
+
exceeds_dead_tuple_thresholds: nDead >= F003_DEAD_TUPLES_MIN && deadPct >= F003_DEAD_PCT_MIN,
|
|
980
|
+
autovacuum_disabled_flagged: autovacuumDisabled && nLive + nDead >= F003_AUTOVACUUM_DISABLED_MIN_ROWS,
|
|
981
|
+
};
|
|
982
|
+
});
|
|
983
|
+
}
|
|
984
|
+
|
|
985
|
+
/**
|
|
986
|
+
* Build concrete, human-readable conclusions and recommendations for F003.
|
|
987
|
+
*
|
|
988
|
+
* Exported separately so the wording (which the console surfaces verbatim in
|
|
989
|
+
* auto-created issues) can be unit-tested without a database.
|
|
990
|
+
*/
|
|
991
|
+
export function buildDeadTuplesConclusions(tables: DeadTuplesTable[]): {
|
|
992
|
+
conclusions: string[];
|
|
993
|
+
recommendations: string[];
|
|
994
|
+
} {
|
|
995
|
+
const conclusions: string[] = [];
|
|
996
|
+
const recommendations: string[] = [];
|
|
997
|
+
|
|
998
|
+
const fmt = (n: number) => n.toLocaleString("en-US");
|
|
999
|
+
|
|
1000
|
+
for (const t of tables) {
|
|
1001
|
+
const rel = `"${t.schema_name}"."${t.table_name}"`;
|
|
1002
|
+
const lastAv = t.last_autovacuum
|
|
1003
|
+
? `last autovacuum: ${t.last_autovacuum}`
|
|
1004
|
+
: "autovacuum has never vacuumed it";
|
|
1005
|
+
|
|
1006
|
+
if (t.exceeds_dead_tuple_thresholds && t.autovacuum_disabled) {
|
|
1007
|
+
conclusions.push(
|
|
1008
|
+
`Table ${rel} has ${fmt(t.n_dead_tup)} dead tuples (${t.dead_pct}% of all tuples) ` +
|
|
1009
|
+
`and autovacuum is disabled on it via reloptions (${lastAv}).`
|
|
1010
|
+
);
|
|
1011
|
+
recommendations.push(
|
|
1012
|
+
`Re-enable autovacuum on ${rel}: alter table ${rel} reset (autovacuum_enabled); ` +
|
|
1013
|
+
`then run: vacuum (analyze) ${rel}; to clean up the accumulated dead tuples.`
|
|
1014
|
+
);
|
|
1015
|
+
} else if (t.exceeds_dead_tuple_thresholds) {
|
|
1016
|
+
conclusions.push(
|
|
1017
|
+
`Table ${rel} has ${fmt(t.n_dead_tup)} dead tuples (${t.dead_pct}% of all tuples; ${lastAv}).`
|
|
1018
|
+
);
|
|
1019
|
+
recommendations.push(
|
|
1020
|
+
`Run: vacuum (analyze) ${rel}; and review autovacuum settings ` +
|
|
1021
|
+
`(autovacuum_vacuum_scale_factor, autovacuum_vacuum_cost_delay, autovacuum_max_workers) ` +
|
|
1022
|
+
`if dead tuples keep accumulating on ${rel}.`
|
|
1023
|
+
);
|
|
1024
|
+
} else if (t.autovacuum_disabled_flagged) {
|
|
1025
|
+
conclusions.push(
|
|
1026
|
+
`Autovacuum is disabled via reloptions on table ${rel} ` +
|
|
1027
|
+
`(~${fmt(t.n_live_tup + t.n_dead_tup)} tuples); dead tuples and transaction ID age ` +
|
|
1028
|
+
`will accumulate unchecked.`
|
|
1029
|
+
);
|
|
1030
|
+
recommendations.push(
|
|
1031
|
+
`Re-enable autovacuum on ${rel}: alter table ${rel} reset (autovacuum_enabled); ` +
|
|
1032
|
+
`unless this table is managed by a carefully scheduled manual vacuum job.`
|
|
1033
|
+
);
|
|
1034
|
+
}
|
|
1035
|
+
}
|
|
1036
|
+
|
|
1037
|
+
return { conclusions, recommendations };
|
|
1038
|
+
}
|
|
1039
|
+
|
|
887
1040
|
/**
|
|
888
1041
|
* Create base report structure
|
|
889
1042
|
*/
|
|
@@ -1286,6 +1439,58 @@ async function generateF001(client: Client, nodeName: string): Promise<Report> {
|
|
|
1286
1439
|
return report;
|
|
1287
1440
|
}
|
|
1288
1441
|
|
|
1442
|
+
/**
|
|
1443
|
+
* Generate F003 report - Autovacuum: dead tuples
|
|
1444
|
+
*
|
|
1445
|
+
* Reads per-table dead-tuple counters from pg_stat_user_tables and per-table
|
|
1446
|
+
* autovacuum overrides from pg_class.reloptions. Flags tables where dead
|
|
1447
|
+
* tuples are high both in absolute terms and relative to live tuples, and
|
|
1448
|
+
* tables where autovacuum is disabled per-table (a classic footgun).
|
|
1449
|
+
*
|
|
1450
|
+
* Unlike F004/F005 (statistical bloat estimators), this check sees dead
|
|
1451
|
+
* tuples that have never been vacuumed.
|
|
1452
|
+
* SQL loaded from config/pgwatch-prometheus/metrics.yml (pg_dead_tuples metric).
|
|
1453
|
+
*/
|
|
1454
|
+
async function generateF003(client: Client, nodeName: string): Promise<Report> {
|
|
1455
|
+
const report = createBaseReport("F003", "Autovacuum: dead tuples", nodeName);
|
|
1456
|
+
const postgresVersion = await getPostgresVersion(client);
|
|
1457
|
+
const pgMajorVersion = parseInt(postgresVersion.server_major_ver, 10) || 16;
|
|
1458
|
+
|
|
1459
|
+
const tables = await getDeadTuples(client, pgMajorVersion);
|
|
1460
|
+
const { datname: dbName, size_bytes: dbSizeBytes } = await getCurrentDatabaseInfo(client, pgMajorVersion);
|
|
1461
|
+
|
|
1462
|
+
const flaggedCount = tables.filter((t) => t.exceeds_dead_tuple_thresholds).length;
|
|
1463
|
+
const autovacuumDisabledCount = tables.filter((t) => t.autovacuum_disabled).length;
|
|
1464
|
+
const autovacuumDisabledFlaggedCount = tables.filter((t) => t.autovacuum_disabled_flagged).length;
|
|
1465
|
+
const totalDeadTuples = tables.reduce((sum, t) => sum + t.n_dead_tup, 0);
|
|
1466
|
+
const { conclusions, recommendations } = buildDeadTuplesConclusions(tables);
|
|
1467
|
+
|
|
1468
|
+
const dbEntry = {
|
|
1469
|
+
dead_tuples_tables: tables,
|
|
1470
|
+
total_count: tables.length,
|
|
1471
|
+
flagged_count: flaggedCount,
|
|
1472
|
+
autovacuum_disabled_count: autovacuumDisabledCount,
|
|
1473
|
+
autovacuum_disabled_flagged_count: autovacuumDisabledFlaggedCount,
|
|
1474
|
+
total_dead_tuples: totalDeadTuples,
|
|
1475
|
+
thresholds: {
|
|
1476
|
+
dead_tuples_min: F003_DEAD_TUPLES_MIN,
|
|
1477
|
+
dead_pct_min: F003_DEAD_PCT_MIN,
|
|
1478
|
+
autovacuum_disabled_min_rows: F003_AUTOVACUUM_DISABLED_MIN_ROWS,
|
|
1479
|
+
},
|
|
1480
|
+
conclusions,
|
|
1481
|
+
recommendations,
|
|
1482
|
+
database_size_bytes: dbSizeBytes,
|
|
1483
|
+
database_size_pretty: formatBytes(dbSizeBytes),
|
|
1484
|
+
};
|
|
1485
|
+
|
|
1486
|
+
report.results[nodeName] = {
|
|
1487
|
+
data: { [dbName]: dbEntry },
|
|
1488
|
+
postgres_version: postgresVersion,
|
|
1489
|
+
};
|
|
1490
|
+
|
|
1491
|
+
return report;
|
|
1492
|
+
}
|
|
1493
|
+
|
|
1289
1494
|
/**
|
|
1290
1495
|
* Generate F004 report - Autovacuum: heap bloat (estimated)
|
|
1291
1496
|
*
|
|
@@ -1900,6 +2105,7 @@ export const REPORT_GENERATORS: Record<string, (client: Client, nodeName: string
|
|
|
1900
2105
|
D001: generateD001,
|
|
1901
2106
|
D004: generateD004,
|
|
1902
2107
|
F001: generateF001,
|
|
2108
|
+
F003: generateF003,
|
|
1903
2109
|
F004: generateF004,
|
|
1904
2110
|
F005: generateF005,
|
|
1905
2111
|
G001: generateG001,
|
package/lib/metrics-loader.ts
CHANGED
|
@@ -17,25 +17,25 @@ import { METRICS, MetricDefinition } from "./metrics-embedded";
|
|
|
17
17
|
*/
|
|
18
18
|
export function getMetricSql(metricName: string, pgMajorVersion: number = 16): string {
|
|
19
19
|
const metric = METRICS[metricName];
|
|
20
|
-
|
|
20
|
+
|
|
21
21
|
if (!metric) {
|
|
22
22
|
throw new Error(`Metric "${metricName}" not found. Available metrics: ${Object.keys(METRICS).join(", ")}`);
|
|
23
23
|
}
|
|
24
|
-
|
|
24
|
+
|
|
25
25
|
// Find the best matching version: highest version <= pgMajorVersion
|
|
26
26
|
const availableVersions = Object.keys(metric.sqls)
|
|
27
27
|
.map(v => parseInt(v, 10))
|
|
28
28
|
.sort((a, b) => b - a); // Sort descending
|
|
29
|
-
|
|
29
|
+
|
|
30
30
|
const matchingVersion = availableVersions.find(v => v <= pgMajorVersion);
|
|
31
|
-
|
|
31
|
+
|
|
32
32
|
if (matchingVersion === undefined) {
|
|
33
33
|
throw new Error(
|
|
34
34
|
`No compatible SQL version for metric "${metricName}" with PostgreSQL ${pgMajorVersion}. ` +
|
|
35
35
|
`Available versions: ${availableVersions.join(", ")}`
|
|
36
36
|
);
|
|
37
37
|
}
|
|
38
|
-
|
|
38
|
+
|
|
39
39
|
return metric.sqls[matchingVersion];
|
|
40
40
|
}
|
|
41
41
|
|
|
@@ -65,6 +65,8 @@ export const METRIC_NAMES = {
|
|
|
65
65
|
H001: "pg_invalid_indexes",
|
|
66
66
|
H002: "unused_indexes",
|
|
67
67
|
H004: "redundant_indexes",
|
|
68
|
+
// Dead tuples and per-table autovacuum overrides
|
|
69
|
+
F003: "pg_dead_tuples",
|
|
68
70
|
// Bloat estimation
|
|
69
71
|
F004: "pg_table_bloat",
|
|
70
72
|
F005: "pg_btree_bloat",
|
|
@@ -86,18 +88,18 @@ export const METRIC_NAMES = {
|
|
|
86
88
|
*/
|
|
87
89
|
export function transformMetricRow(row: Record<string, unknown>): Record<string, unknown> {
|
|
88
90
|
const result: Record<string, unknown> = {};
|
|
89
|
-
|
|
91
|
+
|
|
90
92
|
for (const [key, value] of Object.entries(row)) {
|
|
91
93
|
// Skip Prometheus-specific fields
|
|
92
94
|
if (key === "epoch_ns" || key === "num" || key === "tag_datname") {
|
|
93
95
|
continue;
|
|
94
96
|
}
|
|
95
|
-
|
|
97
|
+
|
|
96
98
|
// Strip tag_ prefix
|
|
97
99
|
const newKey = key.startsWith("tag_") ? key.slice(4) : key;
|
|
98
100
|
result[newKey] = value;
|
|
99
101
|
}
|
|
100
|
-
|
|
102
|
+
|
|
101
103
|
return result;
|
|
102
104
|
}
|
|
103
105
|
|
package/lib/util.ts
CHANGED
|
@@ -22,19 +22,27 @@ function isHtmlContent(text: string): boolean {
|
|
|
22
22
|
return trimmed.startsWith("<!DOCTYPE") || trimmed.startsWith("<html") || trimmed.startsWith("<HTML");
|
|
23
23
|
}
|
|
24
24
|
|
|
25
|
+
/**
|
|
26
|
+
* Remediation hint appended to 401 errors so both humans and AI agents
|
|
27
|
+
* (MCP tool callers) know how to recover from an invalid/stale API key.
|
|
28
|
+
*/
|
|
29
|
+
const AUTH_REMEDIATION_HINT = "Run 'postgresai auth' to (re)authenticate, or set/update PGAI_API_KEY.";
|
|
30
|
+
|
|
25
31
|
/**
|
|
26
32
|
* Format an HTTP error response into a clean, developer-friendly message.
|
|
27
33
|
* Handles HTML error pages (e.g., from Cloudflare) by showing just the status code and message.
|
|
34
|
+
* For 401 responses, appends a remediation hint pointing at `postgresai auth`.
|
|
28
35
|
*/
|
|
29
36
|
export function formatHttpError(operation: string, status: number, responseBody?: string): string {
|
|
30
37
|
const statusMessage = HTTP_STATUS_MESSAGES[status] || "Request failed";
|
|
31
38
|
let errMsg = `${operation}: HTTP ${status} - ${statusMessage}`;
|
|
39
|
+
const remediation = status === 401 ? `\n${AUTH_REMEDIATION_HINT}` : "";
|
|
32
40
|
|
|
33
41
|
if (responseBody) {
|
|
34
42
|
// If it's HTML (like Cloudflare error pages), don't dump the raw HTML
|
|
35
43
|
if (isHtmlContent(responseBody)) {
|
|
36
44
|
// Just use the status message, don't append HTML
|
|
37
|
-
return errMsg;
|
|
45
|
+
return errMsg + remediation;
|
|
38
46
|
}
|
|
39
47
|
|
|
40
48
|
// Try to parse as JSON for structured error info
|
|
@@ -56,7 +64,7 @@ export function formatHttpError(operation: string, status: number, responseBody?
|
|
|
56
64
|
}
|
|
57
65
|
}
|
|
58
66
|
|
|
59
|
-
return errMsg;
|
|
67
|
+
return errMsg + remediation;
|
|
60
68
|
}
|
|
61
69
|
|
|
62
70
|
export function maskSecret(secret: string): string {
|
|
@@ -124,4 +132,3 @@ export function resolveBaseUrls(
|
|
|
124
132
|
storageBaseUrl: normalizeBaseUrl(storageCandidate),
|
|
125
133
|
};
|
|
126
134
|
}
|
|
127
|
-
|
package/package.json
CHANGED
package/scripts/embed-metrics.ts
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
#!/usr/bin/env bun
|
|
2
2
|
/**
|
|
3
3
|
* Build script to embed metrics.yml into the CLI bundle.
|
|
4
|
-
*
|
|
4
|
+
*
|
|
5
5
|
* This script reads config/pgwatch-prometheus/metrics.yml and generates
|
|
6
6
|
* cli/lib/metrics-embedded.ts with the metrics data embedded as TypeScript.
|
|
7
|
-
*
|
|
7
|
+
*
|
|
8
8
|
* The generated file is NOT committed to git - it's regenerated at build time.
|
|
9
|
-
*
|
|
9
|
+
*
|
|
10
10
|
* Usage: bun run scripts/embed-metrics.ts
|
|
11
11
|
*/
|
|
12
12
|
|
|
@@ -46,6 +46,8 @@ const REQUIRED_METRICS = [
|
|
|
46
46
|
"redundant_indexes",
|
|
47
47
|
// Stats reset info (H002)
|
|
48
48
|
"stats_reset",
|
|
49
|
+
// Dead tuples and per-table autovacuum overrides (F003)
|
|
50
|
+
"pg_dead_tuples",
|
|
49
51
|
// Bloat estimation (F004, F005)
|
|
50
52
|
"pg_table_bloat",
|
|
51
53
|
"pg_btree_bloat",
|
|
@@ -55,7 +57,7 @@ const REQUIRED_METRICS = [
|
|
|
55
57
|
|
|
56
58
|
function main() {
|
|
57
59
|
console.log(`Reading metrics from: ${METRICS_YML_PATH}`);
|
|
58
|
-
|
|
60
|
+
|
|
59
61
|
if (!fs.existsSync(METRICS_YML_PATH)) {
|
|
60
62
|
console.error(`ERROR: metrics.yml not found at ${METRICS_YML_PATH}`);
|
|
61
63
|
process.exit(1);
|
|
@@ -120,7 +122,7 @@ function generateTypeScript(metrics: Record<string, MetricDefinition>): string {
|
|
|
120
122
|
|
|
121
123
|
for (const [name, metric] of Object.entries(metrics)) {
|
|
122
124
|
lines.push(` ${JSON.stringify(name)}: {`);
|
|
123
|
-
|
|
125
|
+
|
|
124
126
|
if (metric.description) {
|
|
125
127
|
// Escape description for TypeScript string
|
|
126
128
|
const desc = metric.description.trim().replace(/\n/g, " ").replace(/\s+/g, " ");
|
|
@@ -156,4 +158,3 @@ function generateTypeScript(metrics: Record<string, MetricDefinition>): string {
|
|
|
156
158
|
}
|
|
157
159
|
|
|
158
160
|
main();
|
|
159
|
-
|