postgresai 0.15.0-rc.3 → 0.15.0-rc.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/postgres-ai.ts +127 -20
- package/dist/bin/postgres-ai.js +368 -109
- package/lib/checkup-dictionary.ts +0 -11
- package/lib/checkup.ts +14 -14
- package/lib/init.ts +1 -1
- package/lib/metrics-loader.ts +3 -3
- package/package.json +1 -1
- package/test/upgrade.test.ts +123 -0
package/bin/postgres-ai.ts
CHANGED
|
@@ -74,6 +74,71 @@ function stripMatchingQuotes(value: string): string {
|
|
|
74
74
|
return trimmed;
|
|
75
75
|
}
|
|
76
76
|
|
|
77
|
+
/**
|
|
78
|
+
* Required env vars contract for the monitoring stack.
|
|
79
|
+
*
|
|
80
|
+
* Keys listed here are required by the docker-compose stack and must exist in
|
|
81
|
+
* `.env` for the stack to start cleanly. Each entry knows how to mint a safe
|
|
82
|
+
* default if the key is missing. Existing values are always preserved
|
|
83
|
+
* verbatim - this function is purely additive.
|
|
84
|
+
*
|
|
85
|
+
* This is the spine of the in-place upgrade story: when a user upgrades from
|
|
86
|
+
* a version that didn't require a key (e.g. 0.14, pre-VM-auth) to one that
|
|
87
|
+
* does (0.15), `ensureRequiredEnvVars` appends what's missing so the next
|
|
88
|
+
* `docker compose up` doesn't fail with `missing "<KEY>" env var`.
|
|
89
|
+
*/
|
|
90
|
+
type EnvKeyDefault = {
|
|
91
|
+
key: string;
|
|
92
|
+
/** Default value or factory for green-field installs / first upgrade. */
|
|
93
|
+
defaultValue: () => string;
|
|
94
|
+
/** Key was introduced in this CLI version - used in human-readable migration logs. */
|
|
95
|
+
introducedIn: string;
|
|
96
|
+
};
|
|
97
|
+
|
|
98
|
+
const REQUIRED_ENV_KEYS: EnvKeyDefault[] = [
|
|
99
|
+
{ key: "REPLICATOR_PASSWORD", defaultValue: () => crypto.randomBytes(32).toString("hex"), introducedIn: "0.13" },
|
|
100
|
+
{ key: "VM_AUTH_USERNAME", defaultValue: () => "vmauth", introducedIn: "0.15" },
|
|
101
|
+
{ key: "VM_AUTH_PASSWORD", defaultValue: () => crypto.randomBytes(18).toString("base64"), introducedIn: "0.15" },
|
|
102
|
+
];
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Read `.env` (if present), append any required keys that are missing, write
|
|
106
|
+
* back atomically with 0600 perms, and return the list of keys that were added.
|
|
107
|
+
*
|
|
108
|
+
* Idempotent: a second call is a no-op once all keys are present.
|
|
109
|
+
*
|
|
110
|
+
* Used by `mon local-install`, `mon update`, and `mon update-config` so the
|
|
111
|
+
* in-place upgrade path picks up newly-required env vars without surprising
|
|
112
|
+
* the user with a silent boot failure on `sink-prometheus` / `grafana`.
|
|
113
|
+
*/
|
|
114
|
+
function ensureRequiredEnvVars(projectDir: string): string[] {
|
|
115
|
+
const envFile = path.resolve(projectDir, ".env");
|
|
116
|
+
const existing = fs.existsSync(envFile) ? fs.readFileSync(envFile, "utf8") : "";
|
|
117
|
+
|
|
118
|
+
const added: string[] = [];
|
|
119
|
+
const appendLines: string[] = [];
|
|
120
|
+
|
|
121
|
+
for (const spec of REQUIRED_ENV_KEYS) {
|
|
122
|
+
const re = new RegExp(`^${spec.key}=`, "m");
|
|
123
|
+
if (!re.test(existing)) {
|
|
124
|
+
appendLines.push(`${spec.key}=${spec.defaultValue()}`);
|
|
125
|
+
added.push(spec.key);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
if (appendLines.length === 0) {
|
|
130
|
+
return added;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Append (don't overwrite) so we preserve order and any comments the user
|
|
134
|
+
// may have added to their .env. Make sure we have a trailing newline first.
|
|
135
|
+
const needsTrailingNewline = existing.length > 0 && !existing.endsWith("\n");
|
|
136
|
+
const newContent = existing + (needsTrailingNewline ? "\n" : "") + appendLines.join("\n") + "\n";
|
|
137
|
+
fs.writeFileSync(envFile, newContent, { encoding: "utf8", mode: 0o600 });
|
|
138
|
+
|
|
139
|
+
return added;
|
|
140
|
+
}
|
|
141
|
+
|
|
77
142
|
// Helper functions for spawning processes - use Node.js child_process for compatibility
|
|
78
143
|
async function execFilePromise(file: string, args: string[]): Promise<{ stdout: string; stderr: string }> {
|
|
79
144
|
return new Promise((resolve, reject) => {
|
|
@@ -2970,41 +3035,83 @@ mon
|
|
|
2970
3035
|
});
|
|
2971
3036
|
mon
|
|
2972
3037
|
.command("update-config")
|
|
2973
|
-
.description("apply monitoring services configuration (generate sources)")
|
|
3038
|
+
.description("apply monitoring services configuration (generate sources, migrate .env)")
|
|
2974
3039
|
.action(async () => {
|
|
3040
|
+
let projectDir: string;
|
|
3041
|
+
try {
|
|
3042
|
+
({ projectDir } = await resolveOrInitPaths());
|
|
3043
|
+
} catch (error) {
|
|
3044
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
3045
|
+
console.error(message);
|
|
3046
|
+
process.exitCode = 1;
|
|
3047
|
+
return;
|
|
3048
|
+
}
|
|
3049
|
+
|
|
3050
|
+
// Migrate .env first: append any required keys introduced by newer stack
|
|
3051
|
+
// versions (e.g. VM_AUTH_* added in 0.15). This is what makes in-place
|
|
3052
|
+
// upgrades from older deployments not break with `missing "VM_AUTH_USERNAME"
|
|
3053
|
+
// env var` when sink-prometheus boots.
|
|
3054
|
+
const added = ensureRequiredEnvVars(projectDir);
|
|
3055
|
+
if (added.length > 0) {
|
|
3056
|
+
console.log(`Added missing .env keys for this stack version: ${added.join(", ")}`);
|
|
3057
|
+
console.log("(existing values were preserved; missing keys filled with safe defaults)\n");
|
|
3058
|
+
}
|
|
3059
|
+
|
|
2975
3060
|
const code = await runCompose(["run", "--rm", "sources-generator"]);
|
|
2976
3061
|
if (code !== 0) process.exitCode = code;
|
|
2977
3062
|
});
|
|
2978
3063
|
mon
|
|
2979
3064
|
.command("update")
|
|
2980
|
-
.description("update monitoring stack")
|
|
3065
|
+
.description("update monitoring stack (migrate .env, pull images)")
|
|
2981
3066
|
.action(async () => {
|
|
2982
3067
|
console.log("Updating PostgresAI monitoring stack...\n");
|
|
2983
3068
|
|
|
2984
3069
|
try {
|
|
2985
|
-
|
|
2986
|
-
|
|
2987
|
-
|
|
2988
|
-
|
|
3070
|
+
let projectDir: string;
|
|
3071
|
+
try {
|
|
3072
|
+
({ projectDir } = await resolveOrInitPaths());
|
|
3073
|
+
} catch (error) {
|
|
3074
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
3075
|
+
console.error(message);
|
|
2989
3076
|
process.exitCode = 1;
|
|
2990
3077
|
return;
|
|
2991
3078
|
}
|
|
2992
3079
|
|
|
2993
|
-
//
|
|
2994
|
-
|
|
2995
|
-
|
|
2996
|
-
|
|
2997
|
-
|
|
2998
|
-
const
|
|
2999
|
-
|
|
3000
|
-
|
|
3001
|
-
|
|
3002
|
-
|
|
3003
|
-
|
|
3004
|
-
|
|
3005
|
-
console.log(
|
|
3080
|
+
// Step 1: migrate .env so newer stack versions that require additional
|
|
3081
|
+
// env vars (e.g. VM_AUTH_USERNAME / VM_AUTH_PASSWORD introduced in 0.15)
|
|
3082
|
+
// don't make `docker compose up` fail silently for users who installed
|
|
3083
|
+
// before those vars existed. Purely additive: existing values are kept.
|
|
3084
|
+
console.log("Checking .env for newly-required keys...");
|
|
3085
|
+
const added = ensureRequiredEnvVars(projectDir);
|
|
3086
|
+
if (added.length > 0) {
|
|
3087
|
+
console.log(`✓ Added missing .env keys: ${added.join(", ")}`);
|
|
3088
|
+
console.log(" (existing values preserved; missing keys filled with safe defaults)");
|
|
3089
|
+
} else {
|
|
3090
|
+
console.log("✓ .env is up to date");
|
|
3091
|
+
}
|
|
3092
|
+
console.log();
|
|
3093
|
+
|
|
3094
|
+
// Step 2: refresh repo if this is a git-based deployment. Some users
|
|
3095
|
+
// upgrade purely via `npm install -g postgresai@latest` and don't have a
|
|
3096
|
+
// git checkout - in that case we skip git operations and still do the
|
|
3097
|
+
// env migration + docker pull.
|
|
3098
|
+
const gitDir = path.resolve(projectDir, ".git");
|
|
3099
|
+
if (fs.existsSync(gitDir)) {
|
|
3100
|
+
console.log("Fetching latest changes...");
|
|
3101
|
+
await execFilePromise("git", ["fetch", "origin"]);
|
|
3102
|
+
|
|
3103
|
+
const { stdout: branch } = await execFilePromise("git", ["rev-parse", "--abbrev-ref", "HEAD"]);
|
|
3104
|
+
const currentBranch = branch.trim();
|
|
3105
|
+
console.log(`Current branch: ${currentBranch}`);
|
|
3106
|
+
|
|
3107
|
+
console.log("Pulling latest changes...");
|
|
3108
|
+
const { stdout: pullOut } = await execFilePromise("git", ["pull", "origin", currentBranch]);
|
|
3109
|
+
console.log(pullOut);
|
|
3110
|
+
} else {
|
|
3111
|
+
console.log("(not a git checkout — skipping git fetch/pull and going straight to image pull)");
|
|
3112
|
+
}
|
|
3006
3113
|
|
|
3007
|
-
//
|
|
3114
|
+
// Step 3: pull new images.
|
|
3008
3115
|
console.log("\nUpdating Docker images...");
|
|
3009
3116
|
const code = await runCompose(["pull"]);
|
|
3010
3117
|
|
package/dist/bin/postgres-ai.js
CHANGED
|
@@ -13423,7 +13423,7 @@ var {
|
|
|
13423
13423
|
// package.json
|
|
13424
13424
|
var package_default = {
|
|
13425
13425
|
name: "postgresai",
|
|
13426
|
-
version: "0.15.0-rc.
|
|
13426
|
+
version: "0.15.0-rc.5",
|
|
13427
13427
|
description: "postgres_ai CLI",
|
|
13428
13428
|
license: "Apache-2.0",
|
|
13429
13429
|
private: false,
|
|
@@ -16254,7 +16254,7 @@ var Result = import_lib.default.Result;
|
|
|
16254
16254
|
var TypeOverrides = import_lib.default.TypeOverrides;
|
|
16255
16255
|
var defaults = import_lib.default.defaults;
|
|
16256
16256
|
// package.json
|
|
16257
|
-
var version = "0.15.0-rc.
|
|
16257
|
+
var version = "0.15.0-rc.5";
|
|
16258
16258
|
var package_default2 = {
|
|
16259
16259
|
name: "postgresai",
|
|
16260
16260
|
version,
|
|
@@ -27687,7 +27687,7 @@ where
|
|
|
27687
27687
|
statement_timeout_seconds: 300
|
|
27688
27688
|
},
|
|
27689
27689
|
pg_invalid_indexes: {
|
|
27690
|
-
description: "This metric identifies invalid indexes in the database with decision tree data for remediation. It provides insights into whether to DROP (if duplicate exists), RECREATE (if backs constraint), or flag as UNCERTAIN (if additional RCA is needed to check query plans). Decision tree: 1) Valid duplicate exists -> DROP, 2) Backs PK/UNIQUE constraint -> RECREATE, 3) Table < 10K rows -> RECREATE (small tables rebuild quickly, typically under 1 second), 4) Otherwise -> UNCERTAIN (need query plan analysis to assess impact).",
|
|
27690
|
+
description: "This metric identifies invalid indexes in the database with decision tree data for remediation. It provides insights into whether to DROP (if duplicate exists), RECREATE (if backs constraint), or flag as UNCERTAIN (if additional RCA is needed to check query plans). Decision tree: 1) Valid duplicate exists -> DROP, 2) Backs PK/UNIQUE constraint -> RECREATE, 3) Table < 10K rows -> RECREATE (small tables rebuild quickly, typically under 1 second), 4) Otherwise -> UNCERTAIN (need query plan analysis to assess impact). Adapts the top-N + `'$other$'` bucket pattern from !262 to this metric: ranks invalid indexes by `index_size_bytes desc` (ties broken by schema, table, then index name for stability), keeps the top 100, and folds the tail into a single `'$other$'` row whose `index_size_bytes` / `table_row_estimate` are summed and whose tag columns carry the literal `'$other$'` sentinel. The `'$other$'` row is omitted entirely (via `HAVING count(*) > 0`) when all invalid indexes fit within the top-100 cap, so its absence on healthy clusters is normal.",
|
|
27691
27691
|
sqls: {
|
|
27692
27692
|
11: `with fk_indexes as ( /* pgwatch_generated */
|
|
27693
27693
|
select
|
|
@@ -27755,25 +27755,65 @@ data as (
|
|
|
27755
27755
|
left join valid_duplicates vd on vd.invalid_indexrelid = pidx.indexrelid
|
|
27756
27756
|
where pidx.indisvalid = false
|
|
27757
27757
|
),
|
|
27758
|
-
|
|
27758
|
+
ranked as (
|
|
27759
27759
|
select
|
|
27760
|
-
row_number() over (
|
|
27760
|
+
row_number() over (
|
|
27761
|
+
order by index_size_bytes desc nulls last,
|
|
27762
|
+
tag_schema_name, tag_table_name, tag_index_name
|
|
27763
|
+
) as num,
|
|
27761
27764
|
data.*
|
|
27762
27765
|
from data
|
|
27763
27766
|
)
|
|
27764
27767
|
select
|
|
27765
27768
|
(extract(epoch from now()) * 1e9)::int8 as epoch_ns,
|
|
27766
27769
|
current_database() as tag_datname,
|
|
27767
|
-
|
|
27768
|
-
|
|
27769
|
-
|
|
27770
|
+
num,
|
|
27771
|
+
tag_index_name,
|
|
27772
|
+
tag_schema_name,
|
|
27773
|
+
tag_table_name,
|
|
27774
|
+
tag_relation_name,
|
|
27775
|
+
index_definition,
|
|
27776
|
+
index_size_bytes,
|
|
27777
|
+
is_pk,
|
|
27778
|
+
is_unique,
|
|
27779
|
+
constraint_name,
|
|
27780
|
+
table_row_estimate,
|
|
27781
|
+
has_valid_duplicate,
|
|
27782
|
+
valid_index_name,
|
|
27783
|
+
valid_index_definition,
|
|
27784
|
+
supports_fk
|
|
27785
|
+
from ranked
|
|
27786
|
+
where num <= 100
|
|
27787
|
+
union all
|
|
27788
|
+
select
|
|
27789
|
+
(extract(epoch from now()) * 1e9)::int8 as epoch_ns,
|
|
27790
|
+
current_database() as tag_datname,
|
|
27791
|
+
0::bigint as num,
|
|
27792
|
+
'$other$'::text as tag_index_name,
|
|
27793
|
+
'$other$'::text as tag_schema_name,
|
|
27794
|
+
'$other$'::text as tag_table_name,
|
|
27795
|
+
'$other$'::text as tag_relation_name,
|
|
27796
|
+
'$other$'::text as index_definition,
|
|
27797
|
+
coalesce(sum(index_size_bytes), 0)::int8 as index_size_bytes,
|
|
27798
|
+
false as is_pk,
|
|
27799
|
+
false as is_unique,
|
|
27800
|
+
'$other$'::text as constraint_name,
|
|
27801
|
+
coalesce(sum(table_row_estimate), 0)::bigint as table_row_estimate,
|
|
27802
|
+
bool_or(has_valid_duplicate) as has_valid_duplicate,
|
|
27803
|
+
'$other$'::text as valid_index_name,
|
|
27804
|
+
'$other$'::text as valid_index_definition,
|
|
27805
|
+
coalesce(max(supports_fk), 0)::int as supports_fk
|
|
27806
|
+
from ranked
|
|
27807
|
+
where num > 100
|
|
27808
|
+
group by ()
|
|
27809
|
+
having count(*) > 0;
|
|
27770
27810
|
`
|
|
27771
27811
|
},
|
|
27772
27812
|
gauges: ["*"],
|
|
27773
27813
|
statement_timeout_seconds: 15
|
|
27774
27814
|
},
|
|
27775
27815
|
unused_indexes: {
|
|
27776
|
-
description: "This metric identifies unused indexes in the database. It provides insights into the number of unused indexes and their details. This metric helps administrators identify and fix unused indexes to improve database performance.",
|
|
27816
|
+
description: "This metric identifies unused indexes in the database. It provides insights into the number of unused indexes and their details. This metric helps administrators identify and fix unused indexes to improve database performance. Adapts the top-N + `'$other$'` bucket pattern from !262 to this metric: within the `idx_scan = 0 AND idx_is_btree` filter, ranks indexes by `index_size_bytes desc` (ties broken by schema, table, index name), keeps the top 100, and folds the tail into a single `'$other$'` row. Counter columns (`idx_scan`, `all_scans`, `writes`, `index_size_bytes`, `table_size_bytes`, `relpages`) are summed across the tail; ratio columns (`index_scan_pct`, `scans_per_write`) and the `supports_fk` boolean are deliberately zeroed/false on the aggregate row because the tail-level average would mislead and the per-row FK relationship has no meaningful aggregate. Tag columns carry the literal `'$other$'` sentinel. The `'$other$'` row is omitted entirely (via `HAVING count(*) > 0`) when ≤100 indexes match the unused filter.",
|
|
27777
27817
|
sqls: {
|
|
27778
27818
|
11: `with fk_indexes as ( /* pgwatch_generated */
|
|
27779
27819
|
select
|
|
@@ -27852,6 +27892,17 @@ limit 1000;
|
|
|
27852
27892
|
from indexes i
|
|
27853
27893
|
join table_scans ts on ts.relid = i.indrelid
|
|
27854
27894
|
)
|
|
27895
|
+
, ranked as (
|
|
27896
|
+
select
|
|
27897
|
+
row_number() over (
|
|
27898
|
+
order by index_size_bytes desc nulls last,
|
|
27899
|
+
schema_name, table_name, index_name
|
|
27900
|
+
) as num,
|
|
27901
|
+
*
|
|
27902
|
+
from index_ratios
|
|
27903
|
+
where idx_scan = 0
|
|
27904
|
+
and idx_is_btree
|
|
27905
|
+
)
|
|
27855
27906
|
select
|
|
27856
27907
|
'Never Used Indexes' as tag_reason,
|
|
27857
27908
|
current_database() as tag_datname,
|
|
@@ -27871,19 +27922,39 @@ select
|
|
|
27871
27922
|
idx_is_btree,
|
|
27872
27923
|
opclasses as tag_opclasses,
|
|
27873
27924
|
supports_fk
|
|
27874
|
-
from
|
|
27875
|
-
where
|
|
27876
|
-
|
|
27877
|
-
|
|
27878
|
-
|
|
27879
|
-
|
|
27925
|
+
from ranked
|
|
27926
|
+
where num <= 100
|
|
27927
|
+
union all
|
|
27928
|
+
select
|
|
27929
|
+
'Never Used Indexes' as tag_reason,
|
|
27930
|
+
current_database() as tag_datname,
|
|
27931
|
+
0::oid as index_id,
|
|
27932
|
+
'$other$'::text as tag_schema_name,
|
|
27933
|
+
'$other$'::text as tag_table_name,
|
|
27934
|
+
'$other$'::text as tag_index_name,
|
|
27935
|
+
'$other$'::text as index_definition,
|
|
27936
|
+
coalesce(sum(idx_scan), 0)::int8 as idx_scan,
|
|
27937
|
+
coalesce(sum(all_scans), 0)::int8 as all_scans,
|
|
27938
|
+
0::numeric as index_scan_pct,
|
|
27939
|
+
coalesce(sum(writes), 0)::int8 as writes,
|
|
27940
|
+
0::numeric as scans_per_write,
|
|
27941
|
+
coalesce(sum(index_size_bytes), 0)::int8 as index_size_bytes,
|
|
27942
|
+
coalesce(sum(table_size_bytes), 0)::int8 as table_size_bytes,
|
|
27943
|
+
coalesce(sum(relpages), 0)::int4 as relpages,
|
|
27944
|
+
true as idx_is_btree,
|
|
27945
|
+
'$other$'::text as tag_opclasses,
|
|
27946
|
+
false as supports_fk
|
|
27947
|
+
from ranked
|
|
27948
|
+
where num > 100
|
|
27949
|
+
group by ()
|
|
27950
|
+
having count(*) > 0;
|
|
27880
27951
|
`
|
|
27881
27952
|
},
|
|
27882
27953
|
gauges: ["*"],
|
|
27883
27954
|
statement_timeout_seconds: 15
|
|
27884
27955
|
},
|
|
27885
27956
|
redundant_indexes: {
|
|
27886
|
-
description: "This metric identifies redundant indexes that can potentially be dropped to save storage space and improve write performance. It analyzes index relationships and finds indexes that are covered by other indexes, considering column order, operator classes, and foreign key constraints. Uses the exact logic from tmp.sql with JSON aggregation and proper thresholds.",
|
|
27957
|
+
description: "This metric identifies redundant indexes that can potentially be dropped to save storage space and improve write performance. It analyzes index relationships and finds indexes that are covered by other indexes, considering column order, operator classes, and foreign key constraints. Uses the exact logic from tmp.sql with JSON aggregation and proper thresholds. Adapts the top-N + `'$other$'` bucket pattern from !262 to this metric: ranks redundant indexes by `index_size_bytes desc` (ties broken by `table_name`), keeps the top 100, and folds the tail into a single `'$other$'` row whose `table_size_bytes`, `index_size_bytes` and `index_usage` columns are summed and whose tag columns carry the literal `'$other$'` sentinel. The `redundant_indexes_grouped` CTE intentionally preserves duplicate column aliases (`tag_schema_name` / `tag_index_name` appear twice — once from the raw name and once from the `formated_*` variant) because the dashboards rely on both spellings; the duplication is preserved on the `'$other$'` row for consistency. The `'$other$'` row is omitted entirely (via `HAVING count(*) > 0`) when there are ≤100 redundant pairs, so its absence on healthy clusters is normal.",
|
|
27887
27958
|
sqls: {
|
|
27888
27959
|
11: `with fk_indexes as ( /* pgwatch_generated */
|
|
27889
27960
|
select
|
|
@@ -28035,9 +28106,43 @@ redundant_indexes_tmp_num as (
|
|
|
28035
28106
|
formated_relation_name,
|
|
28036
28107
|
supports_fk
|
|
28037
28108
|
order by index_size_bytes desc
|
|
28109
|
+
),
|
|
28110
|
+
-- redundant_indexes_grouped intentionally exposes duplicate aliases
|
|
28111
|
+
-- (tag_schema_name / tag_index_name appear twice — once from the
|
|
28112
|
+
-- raw name and once from the formated_* variant). select * over it
|
|
28113
|
+
-- preserves both. Order by table_name (unique, non-duplicated).
|
|
28114
|
+
ranked as (
|
|
28115
|
+
select
|
|
28116
|
+
row_number() over (
|
|
28117
|
+
order by index_size_bytes desc nulls last, table_name
|
|
28118
|
+
) as num,
|
|
28119
|
+
redundant_indexes_grouped.*
|
|
28120
|
+
from redundant_indexes_grouped
|
|
28038
28121
|
)
|
|
28039
|
-
select * from
|
|
28040
|
-
|
|
28122
|
+
select * from ranked where num <= 100
|
|
28123
|
+
union all
|
|
28124
|
+
select
|
|
28125
|
+
0::bigint as num,
|
|
28126
|
+
0::oid as index_id,
|
|
28127
|
+
'$other$'::text as tag_schema_name,
|
|
28128
|
+
'$other$'::text as table_name,
|
|
28129
|
+
coalesce(sum(table_size_bytes), 0)::int8 as table_size_bytes,
|
|
28130
|
+
'$other$'::text as tag_index_name,
|
|
28131
|
+
'$other$'::text as tag_access_method,
|
|
28132
|
+
'$other$'::text as tag_reason,
|
|
28133
|
+
coalesce(sum(index_size_bytes), 0)::int8 as index_size_bytes,
|
|
28134
|
+
coalesce(sum(index_usage), 0)::int8 as index_usage,
|
|
28135
|
+
'$other$'::text as index_definition,
|
|
28136
|
+
'$other$'::text as tag_index_name,
|
|
28137
|
+
'$other$'::text as tag_schema_name,
|
|
28138
|
+
'$other$'::text as tag_table_name,
|
|
28139
|
+
'$other$'::text as tag_relation_name,
|
|
28140
|
+
coalesce(max(supports_fk), 0)::int as supports_fk,
|
|
28141
|
+
'$other$'::text as redundant_to_json
|
|
28142
|
+
from ranked
|
|
28143
|
+
where num > 100
|
|
28144
|
+
group by ()
|
|
28145
|
+
having count(*) > 0;
|
|
28041
28146
|
`
|
|
28042
28147
|
},
|
|
28043
28148
|
gauges: ["*"],
|
|
@@ -28059,93 +28164,139 @@ where datname = current_database()
|
|
|
28059
28164
|
statement_timeout_seconds: 15
|
|
28060
28165
|
},
|
|
28061
28166
|
pg_table_bloat: {
|
|
28062
|
-
description: "
|
|
28167
|
+
description: "Estimated per-table bloat (heap pages allocated vs heap pages needed at perfect packing), bounded to the top 100 per database. Adapts the top-N + `'$other$'` bucket pattern from !262: everything below the cap is summed into a single `'$other$'` row so dashboard \"total bloat across the DB\" stays correct even when the tail is large. Ranks by `bloat_pct` descending (most-bloated tables first), with `is_na = 0` preferred (don't crowd top-N with tables whose estimate is unreliable) and stable schemaname/tblname tiebreakers. Preserves the existing >1 MiB filter (zero-byte and tiny tables aren't interesting for bloat). Aggregate semantics on the `'$other$'` row: sum for real_size_mib / extra_size / bloat_size (total wasted bytes in the tail); recompute extra_pct and bloat_pct from the summed numerator/denominator (weighted-avg effectively); avg(fillfactor); max(is_na) (any tail row with bad stats taints the aggregate). The `'$other$'` sentinel cannot collide with a real Postgres identifier.",
|
|
28063
28168
|
sqls: {
|
|
28064
|
-
11: `
|
|
28065
|
-
|
|
28066
|
-
|
|
28067
|
-
|
|
28068
|
-
|
|
28069
|
-
|
|
28070
|
-
|
|
28071
|
-
|
|
28072
|
-
|
|
28073
|
-
|
|
28074
|
-
|
|
28075
|
-
|
|
28076
|
-
|
|
28077
|
-
|
|
28078
|
-
|
|
28079
|
-
|
|
28080
|
-
|
|
28081
|
-
|
|
28082
|
-
|
|
28083
|
-
|
|
28169
|
+
11: `with bloat_data as ( /* pgwatch_generated */
|
|
28170
|
+
select schemaname, tblname,
|
|
28171
|
+
(bs*tblpages)/(1024*1024)::float as real_size_mib,
|
|
28172
|
+
(tblpages-est_tblpages)*bs as extra_size,
|
|
28173
|
+
case when tblpages > 0 and tblpages - est_tblpages > 0
|
|
28174
|
+
then 100 * (tblpages - est_tblpages)/tblpages::float
|
|
28175
|
+
else 0
|
|
28176
|
+
end as extra_pct,
|
|
28177
|
+
fillfactor,
|
|
28178
|
+
case when tblpages - est_tblpages_ff > 0
|
|
28179
|
+
then (tblpages-est_tblpages_ff)*bs
|
|
28180
|
+
else 0
|
|
28181
|
+
end as bloat_size,
|
|
28182
|
+
case when tblpages > 0 and tblpages - est_tblpages_ff > 0
|
|
28183
|
+
then 100 * (tblpages - est_tblpages_ff)/tblpages::float
|
|
28184
|
+
else 0
|
|
28185
|
+
end as bloat_pct,
|
|
28186
|
+
is_na,
|
|
28187
|
+
-- carried for the $other$ aggregate denominators
|
|
28188
|
+
bs, tblpages, est_tblpages, est_tblpages_ff
|
|
28084
28189
|
from (
|
|
28085
|
-
select
|
|
28086
|
-
(
|
|
28087
|
-
|
|
28088
|
-
- case when ceil(tpl_data_size)::int%ma = 0 then ma else ceil(tpl_data_size)::int%ma end
|
|
28089
|
-
) as tpl_size, bs - page_hdr as size_per_block, (heappages + toastpages) as tblpages, heappages,
|
|
28090
|
-
toastpages, reltuples, toasttuples, bs, page_hdr, tblid, schemaname, tblname, fillfactor, is_na
|
|
28091
|
-
-- , tpl_hdr_size, tpl_data_size
|
|
28190
|
+
select ceil( reltuples / ( (bs-page_hdr)/tpl_size ) ) + ceil( toasttuples / 4 ) as est_tblpages,
|
|
28191
|
+
ceil( reltuples / ( (bs-page_hdr)*fillfactor/(tpl_size*100) ) ) + ceil( toasttuples / 4 ) as est_tblpages_ff,
|
|
28192
|
+
tblpages, fillfactor, bs, tblid, schemaname, tblname, heappages, toastpages, is_na
|
|
28092
28193
|
from (
|
|
28093
28194
|
select
|
|
28094
|
-
|
|
28095
|
-
|
|
28096
|
-
|
|
28097
|
-
|
|
28098
|
-
|
|
28099
|
-
|
|
28100
|
-
|
|
28101
|
-
|
|
28102
|
-
|
|
28103
|
-
|
|
28104
|
-
|
|
28105
|
-
|
|
28106
|
-
|
|
28107
|
-
|
|
28108
|
-
|
|
28109
|
-
|
|
28110
|
-
|
|
28111
|
-
|
|
28112
|
-
|
|
28113
|
-
|
|
28114
|
-
|
|
28115
|
-
|
|
28116
|
-
|
|
28117
|
-
|
|
28118
|
-
|
|
28119
|
-
|
|
28120
|
-
|
|
28121
|
-
|
|
28122
|
-
|
|
28123
|
-
|
|
28124
|
-
order by
|
|
28125
|
-
|
|
28195
|
+
( 4 + tpl_hdr_size + tpl_data_size + (2*ma)
|
|
28196
|
+
- case when tpl_hdr_size%ma = 0 then ma else tpl_hdr_size%ma end
|
|
28197
|
+
- case when ceil(tpl_data_size)::int%ma = 0 then ma else ceil(tpl_data_size)::int%ma end
|
|
28198
|
+
) as tpl_size, bs - page_hdr as size_per_block, (heappages + toastpages) as tblpages, heappages,
|
|
28199
|
+
toastpages, reltuples, toasttuples, bs, page_hdr, tblid, schemaname, tblname, fillfactor, is_na
|
|
28200
|
+
from (
|
|
28201
|
+
select
|
|
28202
|
+
tbl.oid as tblid, ns.nspname as schemaname, tbl.relname as tblname, tbl.reltuples,
|
|
28203
|
+
tbl.relpages as heappages, coalesce(toast.relpages, 0) as toastpages,
|
|
28204
|
+
coalesce(toast.reltuples, 0) as toasttuples,
|
|
28205
|
+
coalesce(substring(
|
|
28206
|
+
array_to_string(tbl.reloptions, ' ')
|
|
28207
|
+
from 'fillfactor=([0-9]+)')::smallint, 100) as fillfactor,
|
|
28208
|
+
current_setting('block_size')::numeric as bs,
|
|
28209
|
+
case when version()~'mingw32' or version()~'64-bit|x86_64|ppc64|ia64|amd64' then 8 else 4 end as ma,
|
|
28210
|
+
24 as page_hdr,
|
|
28211
|
+
23 + case when max(coalesce(s.null_frac,0)) > 0 then ( 7 + count(s.attname) ) / 8 else 0::int end
|
|
28212
|
+
+ case when bool_or(att.attname = 'oid' and att.attnum < 0) then 4 else 0 end as tpl_hdr_size,
|
|
28213
|
+
sum( (1-coalesce(s.null_frac, 0)) * coalesce(s.avg_width, 0) ) as tpl_data_size,
|
|
28214
|
+
(bool_or(att.atttypid = 'pg_catalog.name'::regtype)
|
|
28215
|
+
or sum(case when att.attnum > 0 then 1 else 0 end) <> count(s.attname))::int as is_na
|
|
28216
|
+
from pg_attribute as att
|
|
28217
|
+
join pg_class as tbl on att.attrelid = tbl.oid
|
|
28218
|
+
join pg_namespace as ns on ns.oid = tbl.relnamespace
|
|
28219
|
+
left join postgres_ai.pg_statistic as s on s.schemaname=ns.nspname
|
|
28220
|
+
and s.tablename = tbl.relname and s.inherited=false and s.attname=att.attname
|
|
28221
|
+
left join pg_class as toast on tbl.reltoastrelid = toast.oid
|
|
28222
|
+
where not att.attisdropped
|
|
28223
|
+
and tbl.relkind in ('r','m')
|
|
28224
|
+
group by 1,2,3,4,5,6,7,8,9,10
|
|
28225
|
+
order by 2,3
|
|
28226
|
+
) as s
|
|
28227
|
+
) as s2
|
|
28228
|
+
) as s3
|
|
28229
|
+
where (bs * tblpages::float / (1024 * 1024)) > 1 /* exclude tables below 1 MiB */
|
|
28230
|
+
),
|
|
28231
|
+
ranked as (
|
|
28232
|
+
select
|
|
28233
|
+
row_number() over (
|
|
28234
|
+
order by is_na = 0 desc, bloat_pct desc nulls last,
|
|
28235
|
+
schemaname, tblname
|
|
28236
|
+
) as rownum,
|
|
28237
|
+
*
|
|
28238
|
+
from bloat_data
|
|
28239
|
+
)
|
|
28240
|
+
select
|
|
28241
|
+
current_database() as tag_datname,
|
|
28242
|
+
schemaname as tag_schemaname,
|
|
28243
|
+
tblname as tag_tblname,
|
|
28244
|
+
real_size_mib,
|
|
28245
|
+
extra_size,
|
|
28246
|
+
extra_pct,
|
|
28247
|
+
fillfactor,
|
|
28248
|
+
bloat_size,
|
|
28249
|
+
bloat_pct,
|
|
28250
|
+
is_na
|
|
28251
|
+
from ranked
|
|
28252
|
+
where rownum <= 100
|
|
28253
|
+
union all
|
|
28254
|
+
select
|
|
28255
|
+
current_database() as tag_datname,
|
|
28256
|
+
'$other$'::text as tag_schemaname,
|
|
28257
|
+
'$other$'::text as tag_tblname,
|
|
28258
|
+
coalesce(sum(real_size_mib), 0)::float as real_size_mib,
|
|
28259
|
+
coalesce(sum(extra_size), 0)::int8 as extra_size,
|
|
28260
|
+
case when sum(tblpages) > 0
|
|
28261
|
+
then 100 * sum(greatest(tblpages - est_tblpages, 0))::float / sum(tblpages)
|
|
28262
|
+
else 0
|
|
28263
|
+
end::float as extra_pct,
|
|
28264
|
+
coalesce(avg(fillfactor), 100)::smallint as fillfactor,
|
|
28265
|
+
coalesce(sum(bloat_size), 0)::int8 as bloat_size,
|
|
28266
|
+
case when sum(tblpages) > 0
|
|
28267
|
+
then 100 * sum(greatest(tblpages - est_tblpages_ff, 0))::float / sum(tblpages)
|
|
28268
|
+
else 0
|
|
28269
|
+
end::float as bloat_pct,
|
|
28270
|
+
coalesce(max(is_na), 0)::int as is_na
|
|
28271
|
+
from ranked
|
|
28272
|
+
where rownum > 100
|
|
28273
|
+
group by ()
|
|
28274
|
+
having count(*) > 0
|
|
28126
28275
|
`
|
|
28127
28276
|
},
|
|
28128
28277
|
gauges: ["real_size_mib", "extra_size", "extra_pct", "fillfactor", "bloat_size", "bloat_pct", "is_na", "reltuples"],
|
|
28129
28278
|
statement_timeout_seconds: 300
|
|
28130
28279
|
},
|
|
28131
28280
|
pg_btree_bloat: {
|
|
28132
|
-
description: "
|
|
28281
|
+
description: "Estimated per-btree-index bloat (index pages allocated vs index pages needed at perfect packing), bounded to the top 100 per database. Adapts the top-N + `'$other$'` bucket pattern from !262. Ranks by `bloat_pct` descending with `is_na = 0` preferred and stable schema/table/idx tiebreakers. Preserves the existing >1 MiB filter. Aggregate semantics on the `'$other$'` row: sum for real_size_mib / extra_size / bloat_size; recompute extra_pct and bloat_pct from sum(relpages-est_pages) / sum(relpages) (weighted avg over the tail); avg(fillfactor); max(is_na); table_size_mib doesn't aggregate meaningfully across indexes on different tables, so the `'$other$'` row reports 0. The `'$other$'` sentinel cannot collide with a real Postgres identifier.",
|
|
28133
28282
|
sqls: {
|
|
28134
|
-
11: `
|
|
28135
|
-
|
|
28136
|
-
|
|
28137
|
-
|
|
28138
|
-
|
|
28139
|
-
|
|
28140
|
-
|
|
28141
|
-
|
|
28142
|
-
|
|
28143
|
-
|
|
28144
|
-
|
|
28145
|
-
|
|
28146
|
-
|
|
28147
|
-
|
|
28148
|
-
|
|
28283
|
+
11: `with bloat_data as ( /* pgwatch_generated */
|
|
28284
|
+
select
|
|
28285
|
+
nspname, tblname, idxname,
|
|
28286
|
+
(bs*(relpages)/(1024*1024))::float as real_size_mib,
|
|
28287
|
+
(pg_relation_size(tbloid)/(1024*1024))::float as table_size_mib,
|
|
28288
|
+
(bs*(relpages-est_pages))::float as extra_size,
|
|
28289
|
+
100 * (relpages-est_pages)::float / relpages as extra_pct,
|
|
28290
|
+
fillfactor,
|
|
28291
|
+
case when relpages > est_pages_ff
|
|
28292
|
+
then bs*(relpages-est_pages_ff)
|
|
28293
|
+
else 0
|
|
28294
|
+
end as bloat_size,
|
|
28295
|
+
100 * (relpages-est_pages_ff)::float / relpages as bloat_pct,
|
|
28296
|
+
is_na,
|
|
28297
|
+
-- carried for the $other$ aggregate denominators
|
|
28298
|
+
bs, relpages, est_pages, est_pages_ff
|
|
28299
|
+
from (
|
|
28149
28300
|
select coalesce(1 +
|
|
28150
28301
|
ceil(reltuples/floor((bs-pageopqdata-pagehdr)/(4+nulldatahdrwidth)::float)), 0 -- ItemIdData size + computed avg size of a tuple (nulldatahdrwidth)
|
|
28151
28302
|
) as est_pages,
|
|
@@ -28232,8 +28383,55 @@ from (
|
|
|
28232
28383
|
) as rows_hdr_pdg_stats
|
|
28233
28384
|
) as relation_stats
|
|
28234
28385
|
where (bs * relpages::float / (1024 * 1024)) > 1 /* exclude indexes below 1 MiB */
|
|
28235
|
-
|
|
28236
|
-
|
|
28386
|
+
),
|
|
28387
|
+
ranked as (
|
|
28388
|
+
select
|
|
28389
|
+
row_number() over (
|
|
28390
|
+
order by is_na = 0 desc, bloat_pct desc nulls last,
|
|
28391
|
+
nspname, tblname, idxname
|
|
28392
|
+
) as rownum,
|
|
28393
|
+
*
|
|
28394
|
+
from bloat_data
|
|
28395
|
+
)
|
|
28396
|
+
select
|
|
28397
|
+
current_database() as tag_datname,
|
|
28398
|
+
nspname as tag_schemaname,
|
|
28399
|
+
tblname as tag_tblname,
|
|
28400
|
+
idxname as tag_idxname,
|
|
28401
|
+
real_size_mib,
|
|
28402
|
+
table_size_mib,
|
|
28403
|
+
extra_size,
|
|
28404
|
+
extra_pct,
|
|
28405
|
+
fillfactor,
|
|
28406
|
+
bloat_size,
|
|
28407
|
+
bloat_pct,
|
|
28408
|
+
is_na
|
|
28409
|
+
from ranked
|
|
28410
|
+
where rownum <= 100
|
|
28411
|
+
union all
|
|
28412
|
+
select
|
|
28413
|
+
current_database() as tag_datname,
|
|
28414
|
+
'$other$'::text as tag_schemaname,
|
|
28415
|
+
'$other$'::text as tag_tblname,
|
|
28416
|
+
'$other$'::text as tag_idxname,
|
|
28417
|
+
coalesce(sum(real_size_mib), 0)::float as real_size_mib,
|
|
28418
|
+
0::float as table_size_mib,
|
|
28419
|
+
coalesce(sum(extra_size), 0)::float as extra_size,
|
|
28420
|
+
case when sum(relpages) > 0
|
|
28421
|
+
then 100 * sum(greatest(relpages - est_pages, 0))::float / sum(relpages)
|
|
28422
|
+
else 0
|
|
28423
|
+
end::float as extra_pct,
|
|
28424
|
+
coalesce(avg(fillfactor), 90)::smallint as fillfactor,
|
|
28425
|
+
coalesce(sum(bloat_size), 0)::float as bloat_size,
|
|
28426
|
+
case when sum(relpages) > 0
|
|
28427
|
+
then 100 * sum(greatest(relpages - est_pages_ff, 0))::float / sum(relpages)
|
|
28428
|
+
else 0
|
|
28429
|
+
end::float as bloat_pct,
|
|
28430
|
+
coalesce(max(is_na), 0)::int as is_na
|
|
28431
|
+
from ranked
|
|
28432
|
+
where rownum > 100
|
|
28433
|
+
group by ()
|
|
28434
|
+
having count(*) > 0
|
|
28237
28435
|
`
|
|
28238
28436
|
},
|
|
28239
28437
|
gauges: ["real_size_mib", "table_size_mib", "extra_size", "extra_pct", "fillfactor", "bloat_size", "bloat_pct", "is_na", "reltuples"],
|
|
@@ -33353,6 +33551,35 @@ function stripMatchingQuotes(value) {
|
|
|
33353
33551
|
}
|
|
33354
33552
|
return trimmed;
|
|
33355
33553
|
}
|
|
33554
|
+
var REQUIRED_ENV_KEYS = [
|
|
33555
|
+
{ key: "REPLICATOR_PASSWORD", defaultValue: () => crypto2.randomBytes(32).toString("hex"), introducedIn: "0.13" },
|
|
33556
|
+
{ key: "VM_AUTH_USERNAME", defaultValue: () => "vmauth", introducedIn: "0.15" },
|
|
33557
|
+
{ key: "VM_AUTH_PASSWORD", defaultValue: () => crypto2.randomBytes(18).toString("base64"), introducedIn: "0.15" }
|
|
33558
|
+
];
|
|
33559
|
+
function ensureRequiredEnvVars(projectDir) {
|
|
33560
|
+
const envFile = path7.resolve(projectDir, ".env");
|
|
33561
|
+
const existing = fs8.existsSync(envFile) ? fs8.readFileSync(envFile, "utf8") : "";
|
|
33562
|
+
const added = [];
|
|
33563
|
+
const appendLines = [];
|
|
33564
|
+
for (const spec of REQUIRED_ENV_KEYS) {
|
|
33565
|
+
const re = new RegExp(`^${spec.key}=`, "m");
|
|
33566
|
+
if (!re.test(existing)) {
|
|
33567
|
+
appendLines.push(`${spec.key}=${spec.defaultValue()}`);
|
|
33568
|
+
added.push(spec.key);
|
|
33569
|
+
}
|
|
33570
|
+
}
|
|
33571
|
+
if (appendLines.length === 0) {
|
|
33572
|
+
return added;
|
|
33573
|
+
}
|
|
33574
|
+
const needsTrailingNewline = existing.length > 0 && !existing.endsWith(`
|
|
33575
|
+
`);
|
|
33576
|
+
const newContent = existing + (needsTrailingNewline ? `
|
|
33577
|
+
` : "") + appendLines.join(`
|
|
33578
|
+
`) + `
|
|
33579
|
+
`;
|
|
33580
|
+
fs8.writeFileSync(envFile, newContent, { encoding: "utf8", mode: 384 });
|
|
33581
|
+
return added;
|
|
33582
|
+
}
|
|
33356
33583
|
async function execFilePromise(file, args) {
|
|
33357
33584
|
return new Promise((resolve8, reject) => {
|
|
33358
33585
|
childProcess.execFile(file, args, (error2, stdout, stderr) => {
|
|
@@ -35627,29 +35854,61 @@ Instances configuration:
|
|
|
35627
35854
|
console.log();
|
|
35628
35855
|
}
|
|
35629
35856
|
});
|
|
35630
|
-
mon.command("update-config").description("apply monitoring services configuration (generate sources)").action(async () => {
|
|
35857
|
+
mon.command("update-config").description("apply monitoring services configuration (generate sources, migrate .env)").action(async () => {
|
|
35858
|
+
let projectDir;
|
|
35859
|
+
try {
|
|
35860
|
+
({ projectDir } = await resolveOrInitPaths());
|
|
35861
|
+
} catch (error2) {
|
|
35862
|
+
const message = error2 instanceof Error ? error2.message : String(error2);
|
|
35863
|
+
console.error(message);
|
|
35864
|
+
process.exitCode = 1;
|
|
35865
|
+
return;
|
|
35866
|
+
}
|
|
35867
|
+
const added = ensureRequiredEnvVars(projectDir);
|
|
35868
|
+
if (added.length > 0) {
|
|
35869
|
+
console.log(`Added missing .env keys for this stack version: ${added.join(", ")}`);
|
|
35870
|
+
console.log(`(existing values were preserved; missing keys filled with safe defaults)
|
|
35871
|
+
`);
|
|
35872
|
+
}
|
|
35631
35873
|
const code = await runCompose(["run", "--rm", "sources-generator"]);
|
|
35632
35874
|
if (code !== 0)
|
|
35633
35875
|
process.exitCode = code;
|
|
35634
35876
|
});
|
|
35635
|
-
mon.command("update").description("update monitoring stack").action(async () => {
|
|
35877
|
+
mon.command("update").description("update monitoring stack (migrate .env, pull images)").action(async () => {
|
|
35636
35878
|
console.log(`Updating PostgresAI monitoring stack...
|
|
35637
35879
|
`);
|
|
35638
35880
|
try {
|
|
35639
|
-
|
|
35640
|
-
|
|
35641
|
-
|
|
35881
|
+
let projectDir;
|
|
35882
|
+
try {
|
|
35883
|
+
({ projectDir } = await resolveOrInitPaths());
|
|
35884
|
+
} catch (error2) {
|
|
35885
|
+
const message = error2 instanceof Error ? error2.message : String(error2);
|
|
35886
|
+
console.error(message);
|
|
35642
35887
|
process.exitCode = 1;
|
|
35643
35888
|
return;
|
|
35644
35889
|
}
|
|
35645
|
-
console.log("
|
|
35646
|
-
|
|
35647
|
-
|
|
35648
|
-
|
|
35649
|
-
|
|
35650
|
-
|
|
35651
|
-
|
|
35652
|
-
|
|
35890
|
+
console.log("Checking .env for newly-required keys...");
|
|
35891
|
+
const added = ensureRequiredEnvVars(projectDir);
|
|
35892
|
+
if (added.length > 0) {
|
|
35893
|
+
console.log(`\u2713 Added missing .env keys: ${added.join(", ")}`);
|
|
35894
|
+
console.log(" (existing values preserved; missing keys filled with safe defaults)");
|
|
35895
|
+
} else {
|
|
35896
|
+
console.log("\u2713 .env is up to date");
|
|
35897
|
+
}
|
|
35898
|
+
console.log();
|
|
35899
|
+
const gitDir = path7.resolve(projectDir, ".git");
|
|
35900
|
+
if (fs8.existsSync(gitDir)) {
|
|
35901
|
+
console.log("Fetching latest changes...");
|
|
35902
|
+
await execFilePromise("git", ["fetch", "origin"]);
|
|
35903
|
+
const { stdout: branch } = await execFilePromise("git", ["rev-parse", "--abbrev-ref", "HEAD"]);
|
|
35904
|
+
const currentBranch = branch.trim();
|
|
35905
|
+
console.log(`Current branch: ${currentBranch}`);
|
|
35906
|
+
console.log("Pulling latest changes...");
|
|
35907
|
+
const { stdout: pullOut } = await execFilePromise("git", ["pull", "origin", currentBranch]);
|
|
35908
|
+
console.log(pullOut);
|
|
35909
|
+
} else {
|
|
35910
|
+
console.log("(not a git checkout \u2014 skipping git fetch/pull and going straight to image pull)");
|
|
35911
|
+
}
|
|
35653
35912
|
console.log(`
|
|
35654
35913
|
Updating Docker images...`);
|
|
35655
35914
|
const code = await runCompose(["pull"]);
|
|
@@ -57,17 +57,6 @@ export function getCheckupEntry(code: string): CheckupDictionaryEntry | null {
|
|
|
57
57
|
return dictionaryByCode.get(code.toUpperCase()) ?? null;
|
|
58
58
|
}
|
|
59
59
|
|
|
60
|
-
/**
|
|
61
|
-
* Get the title for a checkup code.
|
|
62
|
-
*
|
|
63
|
-
* @param code - The check code (e.g., "A001", "H002")
|
|
64
|
-
* @returns The title or the code itself if not found
|
|
65
|
-
*/
|
|
66
|
-
export function getCheckupTitle(code: string): string {
|
|
67
|
-
const entry = getCheckupEntry(code);
|
|
68
|
-
return entry?.title ?? code;
|
|
69
|
-
}
|
|
70
|
-
|
|
71
60
|
/**
|
|
72
61
|
* Check if a code exists in the dictionary.
|
|
73
62
|
*
|
package/lib/checkup.ts
CHANGED
|
@@ -2,41 +2,41 @@
|
|
|
2
2
|
* Express Checkup Module
|
|
3
3
|
* ======================
|
|
4
4
|
* Generates JSON health check reports directly from PostgreSQL without Prometheus.
|
|
5
|
-
*
|
|
5
|
+
*
|
|
6
6
|
* ARCHITECTURAL DECISIONS
|
|
7
7
|
* -----------------------
|
|
8
|
-
*
|
|
8
|
+
*
|
|
9
9
|
* 1. SINGLE SOURCE OF TRUTH FOR SQL QUERIES
|
|
10
|
-
* Complex metrics (index health, settings, db_stats) are loaded from
|
|
10
|
+
* Complex metrics (index health, settings, db_stats) are loaded from
|
|
11
11
|
* config/pgwatch-prometheus/metrics.yml via getMetricSql() from metrics-loader.ts.
|
|
12
|
-
*
|
|
12
|
+
*
|
|
13
13
|
* Simple queries (version, database list, connection states, uptime) use
|
|
14
14
|
* inline SQL as they're trivial and CLI-specific.
|
|
15
|
-
*
|
|
15
|
+
*
|
|
16
16
|
* 2. JSON SCHEMA COMPLIANCE
|
|
17
17
|
* All generated reports MUST comply with JSON schemas in reporter/schemas/.
|
|
18
18
|
* These schemas define the expected format for both:
|
|
19
19
|
* - Full-fledged monitoring reporter output
|
|
20
20
|
* - Express checkup output
|
|
21
|
-
*
|
|
21
|
+
*
|
|
22
22
|
* Before adding or modifying a report, verify the corresponding schema exists
|
|
23
23
|
* and ensure the output matches. Run schema validation tests to confirm.
|
|
24
|
-
*
|
|
24
|
+
*
|
|
25
25
|
* 3. ERROR HANDLING STRATEGY
|
|
26
26
|
* Functions follow two patterns based on criticality:
|
|
27
|
-
*
|
|
27
|
+
*
|
|
28
28
|
* PROPAGATING (throws on error):
|
|
29
29
|
* - Core data functions: getPostgresVersion, getSettings, getAlteredSettings,
|
|
30
30
|
* getDatabaseSizes, getInvalidIndexes, getUnusedIndexes, getRedundantIndexes
|
|
31
31
|
* - If these fail, the entire report should fail (data is required)
|
|
32
32
|
* - Callers should handle errors at the report generation level
|
|
33
|
-
*
|
|
33
|
+
*
|
|
34
34
|
* GRACEFUL DEGRADATION (catches errors, includes error in output):
|
|
35
35
|
* - Optional/supplementary queries: pg_stat_statements, pg_stat_kcache checks,
|
|
36
36
|
* memory calculations, postmaster startup time
|
|
37
37
|
* - These are nice-to-have; missing data shouldn't fail the whole report
|
|
38
38
|
* - Errors are logged and included in report output for visibility
|
|
39
|
-
*
|
|
39
|
+
*
|
|
40
40
|
* ADDING NEW REPORTS
|
|
41
41
|
* ------------------
|
|
42
42
|
* 1. Add/verify the metric exists in config/pgwatch-prometheus/metrics.yml
|
|
@@ -51,7 +51,7 @@ import * as fs from "fs";
|
|
|
51
51
|
import * as path from "path";
|
|
52
52
|
import * as pkg from "../package.json";
|
|
53
53
|
import { getMetricSql, transformMetricRow, METRIC_NAMES } from "./metrics-loader";
|
|
54
|
-
import {
|
|
54
|
+
import { buildCheckInfoMap } from "./checkup-dictionary";
|
|
55
55
|
|
|
56
56
|
// Time constants
|
|
57
57
|
const SECONDS_PER_DAY = 86400;
|
|
@@ -336,7 +336,7 @@ export function parseVersionNum(versionNum: string): { major: string; minor: str
|
|
|
336
336
|
/**
|
|
337
337
|
* Format bytes to human readable string using binary units (1024-based).
|
|
338
338
|
* Uses IEC standard: KiB, MiB, GiB, etc.
|
|
339
|
-
*
|
|
339
|
+
*
|
|
340
340
|
* Note: PostgreSQL's pg_size_pretty() uses kB/MB/GB with 1024 base (technically
|
|
341
341
|
* incorrect SI usage), but we follow IEC binary units per project style guide.
|
|
342
342
|
*/
|
|
@@ -387,7 +387,7 @@ function formatSettingPrettyValue(
|
|
|
387
387
|
/**
|
|
388
388
|
* Get PostgreSQL version information.
|
|
389
389
|
* Uses simple inline SQL (trivial query, CLI-specific).
|
|
390
|
-
*
|
|
390
|
+
*
|
|
391
391
|
* @throws {Error} If database query fails (propagating - critical data)
|
|
392
392
|
*/
|
|
393
393
|
export async function getPostgresVersion(client: Client): Promise<PostgresVersion> {
|
|
@@ -1084,7 +1084,7 @@ export const generateH004 = (client: Client, nodeName = "node-01") =>
|
|
|
1084
1084
|
|
|
1085
1085
|
/**
|
|
1086
1086
|
* Generate D004 report - pg_stat_statements and pg_stat_kcache settings.
|
|
1087
|
-
*
|
|
1087
|
+
*
|
|
1088
1088
|
* Uses graceful degradation: extension queries are wrapped in try-catch
|
|
1089
1089
|
* because extensions may not be installed. Errors are included in the
|
|
1090
1090
|
* report output rather than failing the entire report.
|
package/lib/init.ts
CHANGED
|
@@ -87,7 +87,7 @@ export type AdminConnection = {
|
|
|
87
87
|
/**
|
|
88
88
|
* Check if an error indicates SSL negotiation failed and fallback to non-SSL should be attempted.
|
|
89
89
|
* This mimics libpq's sslmode=prefer behavior.
|
|
90
|
-
*
|
|
90
|
+
*
|
|
91
91
|
* IMPORTANT: This should NOT match certificate errors (expired, invalid, self-signed)
|
|
92
92
|
* as those are real errors the user needs to fix, not negotiation failures.
|
|
93
93
|
*/
|
package/lib/metrics-loader.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Metrics loader for express checkup reports
|
|
3
|
-
*
|
|
3
|
+
*
|
|
4
4
|
* Loads SQL queries from embedded metrics data (generated from metrics.yml at build time).
|
|
5
5
|
* Provides version-aware query selection and row transformation utilities.
|
|
6
6
|
*/
|
|
@@ -9,7 +9,7 @@ import { METRICS, MetricDefinition } from "./metrics-embedded";
|
|
|
9
9
|
|
|
10
10
|
/**
|
|
11
11
|
* Get SQL query for a specific metric, selecting the appropriate version.
|
|
12
|
-
*
|
|
12
|
+
*
|
|
13
13
|
* @param metricName - Name of the metric (e.g., "settings", "db_stats")
|
|
14
14
|
* @param pgMajorVersion - PostgreSQL major version (default: 16)
|
|
15
15
|
* @returns SQL query string
|
|
@@ -41,7 +41,7 @@ export function getMetricSql(metricName: string, pgMajorVersion: number = 16): s
|
|
|
41
41
|
|
|
42
42
|
/**
|
|
43
43
|
* Get metric definition including all metadata.
|
|
44
|
-
*
|
|
44
|
+
*
|
|
45
45
|
* @param metricName - Name of the metric
|
|
46
46
|
* @returns MetricDefinition or undefined if not found
|
|
47
47
|
*/
|
package/package.json
CHANGED
package/test/upgrade.test.ts
CHANGED
|
@@ -420,3 +420,126 @@ describe("upgrade CLI commands", () => {
|
|
|
420
420
|
expect(stdout).toMatch(/health/i);
|
|
421
421
|
}, { timeout: TEST_TIMEOUT });
|
|
422
422
|
});
|
|
423
|
+
|
|
424
|
+
describe("in-place upgrade env migration (mon update / update-config)", () => {
|
|
425
|
+
/**
|
|
426
|
+
* Regression tests for the 0.14 -> 0.15 in-place upgrade gap (#203).
|
|
427
|
+
*
|
|
428
|
+
* Before this fix, a user who installed at 0.14 and ran the documented
|
|
429
|
+
* upgrade flow (`pgai mon update`) ended up with a .env file that lacked
|
|
430
|
+
* VM_AUTH_USERNAME / VM_AUTH_PASSWORD, so sink-prometheus exited with:
|
|
431
|
+
*
|
|
432
|
+
* fatal cannot read "/postgres_ai_configs/prometheus/prometheus.yml":
|
|
433
|
+
* cannot expand environment variables: missing "VM_AUTH_USERNAME" env var
|
|
434
|
+
*
|
|
435
|
+
* `mon update` and `mon update-config` now migrate .env additively before
|
|
436
|
+
* doing anything else.
|
|
437
|
+
*/
|
|
438
|
+
|
|
439
|
+
let tempDir: string;
|
|
440
|
+
|
|
441
|
+
beforeAll(() => {
|
|
442
|
+
tempDir = fs.mkdtempSync(resolve(os.tmpdir(), "pgai-upgrade-env-migration-"));
|
|
443
|
+
});
|
|
444
|
+
|
|
445
|
+
afterAll(() => {
|
|
446
|
+
if (tempDir && fs.existsSync(tempDir)) {
|
|
447
|
+
fs.rmSync(tempDir, { recursive: true, force: true });
|
|
448
|
+
}
|
|
449
|
+
});
|
|
450
|
+
|
|
451
|
+
test("mon update-config appends missing VM_AUTH_USERNAME / VM_AUTH_PASSWORD to a 0.14-shaped .env", () => {
|
|
452
|
+
const testDir = resolve(tempDir, "update-config-0.14-env");
|
|
453
|
+
fs.mkdirSync(testDir, { recursive: true });
|
|
454
|
+
|
|
455
|
+
// 0.14-shaped .env: PGAI_TAG present, VM_AUTH_* absent.
|
|
456
|
+
fs.writeFileSync(resolve(testDir, ".env"), "PGAI_TAG=0.14.0\nGF_SECURITY_ADMIN_PASSWORD=user-set-grafana-pw\n");
|
|
457
|
+
fs.writeFileSync(resolve(testDir, "docker-compose.yml"), "version: '3'\nservices: {}\n");
|
|
458
|
+
fs.writeFileSync(resolve(testDir, "instances.yml"), "# instances\n");
|
|
459
|
+
|
|
460
|
+
// The compose run will fail (no Docker in CI), but env migration runs first.
|
|
461
|
+
runCliInDir(["mon", "update-config"], testDir, { PGAI_TAG: undefined });
|
|
462
|
+
|
|
463
|
+
const envContent = fs.readFileSync(resolve(testDir, ".env"), "utf8");
|
|
464
|
+
|
|
465
|
+
// Existing values must be preserved verbatim.
|
|
466
|
+
expect(envContent).toMatch(/^PGAI_TAG=0\.14\.0$/m);
|
|
467
|
+
expect(envContent).toMatch(/^GF_SECURITY_ADMIN_PASSWORD=user-set-grafana-pw$/m);
|
|
468
|
+
|
|
469
|
+
// New required keys must be appended (vmauth username + non-empty base64 password).
|
|
470
|
+
expect(envContent).toMatch(/^VM_AUTH_USERNAME=vmauth$/m);
|
|
471
|
+
expect(envContent).toMatch(/^VM_AUTH_PASSWORD=[A-Za-z0-9+/]+={0,2}$/m);
|
|
472
|
+
|
|
473
|
+
// REPLICATOR_PASSWORD was introduced earlier and is also part of the contract.
|
|
474
|
+
expect(envContent).toMatch(/^REPLICATOR_PASSWORD=[a-f0-9]{64}$/m);
|
|
475
|
+
}, { timeout: TEST_TIMEOUT });
|
|
476
|
+
|
|
477
|
+
test("mon update appends missing VM_AUTH_USERNAME / VM_AUTH_PASSWORD to a 0.14-shaped .env", () => {
|
|
478
|
+
const testDir = resolve(tempDir, "update-0.14-env");
|
|
479
|
+
fs.mkdirSync(testDir, { recursive: true });
|
|
480
|
+
|
|
481
|
+
fs.writeFileSync(resolve(testDir, ".env"), "PGAI_TAG=0.14.0\n");
|
|
482
|
+
fs.writeFileSync(resolve(testDir, "docker-compose.yml"), "version: '3'\nservices: {}\n");
|
|
483
|
+
fs.writeFileSync(resolve(testDir, "instances.yml"), "# instances\n");
|
|
484
|
+
|
|
485
|
+
// mon update will fail (no Docker in CI, no git repo), but env migration runs first.
|
|
486
|
+
const result = runCliInDir(["mon", "update"], testDir, { PGAI_TAG: undefined });
|
|
487
|
+
|
|
488
|
+
const envContent = fs.readFileSync(resolve(testDir, ".env"), "utf8");
|
|
489
|
+
|
|
490
|
+
expect(envContent).toMatch(/^PGAI_TAG=0\.14\.0$/m);
|
|
491
|
+
expect(envContent).toMatch(/^VM_AUTH_USERNAME=vmauth$/m);
|
|
492
|
+
expect(envContent).toMatch(/^VM_AUTH_PASSWORD=[A-Za-z0-9+/]+={0,2}$/m);
|
|
493
|
+
|
|
494
|
+
// The migration step should print what it added so the user can see it.
|
|
495
|
+
expect(result.stdout).toMatch(/Added missing \.env keys/);
|
|
496
|
+
expect(result.stdout).toMatch(/VM_AUTH_USERNAME/);
|
|
497
|
+
expect(result.stdout).toMatch(/VM_AUTH_PASSWORD/);
|
|
498
|
+
}, { timeout: TEST_TIMEOUT });
|
|
499
|
+
|
|
500
|
+
test("mon update preserves existing VM_AUTH_* values (no rotation)", () => {
|
|
501
|
+
const testDir = resolve(tempDir, "update-preserve-vm-auth");
|
|
502
|
+
fs.mkdirSync(testDir, { recursive: true });
|
|
503
|
+
|
|
504
|
+
// User already has VM auth configured (e.g. set up via rotate-vm-auth.sh).
|
|
505
|
+
fs.writeFileSync(
|
|
506
|
+
resolve(testDir, ".env"),
|
|
507
|
+
"PGAI_TAG=0.15.0\nVM_AUTH_USERNAME=custom-user\nVM_AUTH_PASSWORD=custom-pw-do-not-rotate\nREPLICATOR_PASSWORD=" +
|
|
508
|
+
"a".repeat(64) +
|
|
509
|
+
"\n",
|
|
510
|
+
);
|
|
511
|
+
fs.writeFileSync(resolve(testDir, "docker-compose.yml"), "version: '3'\nservices: {}\n");
|
|
512
|
+
fs.writeFileSync(resolve(testDir, "instances.yml"), "# instances\n");
|
|
513
|
+
|
|
514
|
+
const result = runCliInDir(["mon", "update"], testDir, { PGAI_TAG: undefined });
|
|
515
|
+
|
|
516
|
+
const envContent = fs.readFileSync(resolve(testDir, ".env"), "utf8");
|
|
517
|
+
|
|
518
|
+
expect(envContent).toMatch(/^VM_AUTH_USERNAME=custom-user$/m);
|
|
519
|
+
expect(envContent).toMatch(/^VM_AUTH_PASSWORD=custom-pw-do-not-rotate$/m);
|
|
520
|
+
expect(envContent).toMatch(/^REPLICATOR_PASSWORD=a{64}$/m);
|
|
521
|
+
|
|
522
|
+
// When nothing is missing, the migration step should say so.
|
|
523
|
+
expect(result.stdout).toMatch(/\.env is up to date/);
|
|
524
|
+
}, { timeout: TEST_TIMEOUT });
|
|
525
|
+
|
|
526
|
+
test("mon update-config handles a .env that doesn't end with a newline", () => {
|
|
527
|
+
const testDir = resolve(tempDir, "update-config-no-trailing-newline");
|
|
528
|
+
fs.mkdirSync(testDir, { recursive: true });
|
|
529
|
+
|
|
530
|
+
// No trailing newline - migration must add one before appending new keys
|
|
531
|
+
// or we'd produce e.g. `PGAI_TAG=0.14.0VM_AUTH_USERNAME=vmauth`.
|
|
532
|
+
fs.writeFileSync(resolve(testDir, ".env"), "PGAI_TAG=0.14.0");
|
|
533
|
+
fs.writeFileSync(resolve(testDir, "docker-compose.yml"), "version: '3'\nservices: {}\n");
|
|
534
|
+
fs.writeFileSync(resolve(testDir, "instances.yml"), "# instances\n");
|
|
535
|
+
|
|
536
|
+
runCliInDir(["mon", "update-config"], testDir, { PGAI_TAG: undefined });
|
|
537
|
+
|
|
538
|
+
const envContent = fs.readFileSync(resolve(testDir, ".env"), "utf8");
|
|
539
|
+
|
|
540
|
+
expect(envContent).toMatch(/^PGAI_TAG=0\.14\.0$/m);
|
|
541
|
+
expect(envContent).toMatch(/^VM_AUTH_USERNAME=vmauth$/m);
|
|
542
|
+
// No key should be glued onto the previous line.
|
|
543
|
+
expect(envContent).not.toMatch(/PGAI_TAG=0\.14\.0VM_AUTH_USERNAME/);
|
|
544
|
+
}, { timeout: TEST_TIMEOUT });
|
|
545
|
+
});
|