postgresai 0.15.0-rc.4 → 0.15.0-rc.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -74,6 +74,71 @@ function stripMatchingQuotes(value: string): string {
74
74
  return trimmed;
75
75
  }
76
76
 
77
+ /**
78
+ * Required env vars contract for the monitoring stack.
79
+ *
80
+ * Keys listed here are required by the docker-compose stack and must exist in
81
+ * `.env` for the stack to start cleanly. Each entry knows how to mint a safe
82
+ * default if the key is missing. Existing values are always preserved
83
+ * verbatim - this function is purely additive.
84
+ *
85
+ * This is the spine of the in-place upgrade story: when a user upgrades from
86
+ * a version that didn't require a key (e.g. 0.14, pre-VM-auth) to one that
87
+ * does (0.15), `ensureRequiredEnvVars` appends what's missing so the next
88
+ * `docker compose up` doesn't fail with `missing "<KEY>" env var`.
89
+ */
90
+ type EnvKeyDefault = {
91
+ key: string;
92
+ /** Default value or factory for green-field installs / first upgrade. */
93
+ defaultValue: () => string;
94
+ /** Key was introduced in this CLI version - used in human-readable migration logs. */
95
+ introducedIn: string;
96
+ };
97
+
98
+ const REQUIRED_ENV_KEYS: EnvKeyDefault[] = [
99
+ { key: "REPLICATOR_PASSWORD", defaultValue: () => crypto.randomBytes(32).toString("hex"), introducedIn: "0.13" },
100
+ { key: "VM_AUTH_USERNAME", defaultValue: () => "vmauth", introducedIn: "0.15" },
101
+ { key: "VM_AUTH_PASSWORD", defaultValue: () => crypto.randomBytes(18).toString("base64"), introducedIn: "0.15" },
102
+ ];
103
+
104
+ /**
105
+ * Read `.env` (if present), append any required keys that are missing, write
106
+ * back atomically with 0600 perms, and return the list of keys that were added.
107
+ *
108
+ * Idempotent: a second call is a no-op once all keys are present.
109
+ *
110
+ * Used by `mon local-install`, `mon update`, and `mon update-config` so the
111
+ * in-place upgrade path picks up newly-required env vars without surprising
112
+ * the user with a silent boot failure on `sink-prometheus` / `grafana`.
113
+ */
114
+ function ensureRequiredEnvVars(projectDir: string): string[] {
115
+ const envFile = path.resolve(projectDir, ".env");
116
+ const existing = fs.existsSync(envFile) ? fs.readFileSync(envFile, "utf8") : "";
117
+
118
+ const added: string[] = [];
119
+ const appendLines: string[] = [];
120
+
121
+ for (const spec of REQUIRED_ENV_KEYS) {
122
+ const re = new RegExp(`^${spec.key}=`, "m");
123
+ if (!re.test(existing)) {
124
+ appendLines.push(`${spec.key}=${spec.defaultValue()}`);
125
+ added.push(spec.key);
126
+ }
127
+ }
128
+
129
+ if (appendLines.length === 0) {
130
+ return added;
131
+ }
132
+
133
+ // Append (don't overwrite) so we preserve order and any comments the user
134
+ // may have added to their .env. Make sure we have a trailing newline first.
135
+ const needsTrailingNewline = existing.length > 0 && !existing.endsWith("\n");
136
+ const newContent = existing + (needsTrailingNewline ? "\n" : "") + appendLines.join("\n") + "\n";
137
+ fs.writeFileSync(envFile, newContent, { encoding: "utf8", mode: 0o600 });
138
+
139
+ return added;
140
+ }
141
+
77
142
  // Helper functions for spawning processes - use Node.js child_process for compatibility
78
143
  async function execFilePromise(file: string, args: string[]): Promise<{ stdout: string; stderr: string }> {
79
144
  return new Promise((resolve, reject) => {
@@ -2970,41 +3035,83 @@ mon
2970
3035
  });
2971
3036
  mon
2972
3037
  .command("update-config")
2973
- .description("apply monitoring services configuration (generate sources)")
3038
+ .description("apply monitoring services configuration (generate sources, migrate .env)")
2974
3039
  .action(async () => {
3040
+ let projectDir: string;
3041
+ try {
3042
+ ({ projectDir } = await resolveOrInitPaths());
3043
+ } catch (error) {
3044
+ const message = error instanceof Error ? error.message : String(error);
3045
+ console.error(message);
3046
+ process.exitCode = 1;
3047
+ return;
3048
+ }
3049
+
3050
+ // Migrate .env first: append any required keys introduced by newer stack
3051
+ // versions (e.g. VM_AUTH_* added in 0.15). This is what makes in-place
3052
+ // upgrades from older deployments not break with `missing "VM_AUTH_USERNAME"
3053
+ // env var` when sink-prometheus boots.
3054
+ const added = ensureRequiredEnvVars(projectDir);
3055
+ if (added.length > 0) {
3056
+ console.log(`Added missing .env keys for this stack version: ${added.join(", ")}`);
3057
+ console.log("(existing values were preserved; missing keys filled with safe defaults)\n");
3058
+ }
3059
+
2975
3060
  const code = await runCompose(["run", "--rm", "sources-generator"]);
2976
3061
  if (code !== 0) process.exitCode = code;
2977
3062
  });
2978
3063
  mon
2979
3064
  .command("update")
2980
- .description("update monitoring stack")
3065
+ .description("update monitoring stack (migrate .env, pull images)")
2981
3066
  .action(async () => {
2982
3067
  console.log("Updating PostgresAI monitoring stack...\n");
2983
3068
 
2984
3069
  try {
2985
- // Check if we're in a git repo
2986
- const gitDir = path.resolve(process.cwd(), ".git");
2987
- if (!fs.existsSync(gitDir)) {
2988
- console.error("Not a git repository. Cannot update.");
3070
+ let projectDir: string;
3071
+ try {
3072
+ ({ projectDir } = await resolveOrInitPaths());
3073
+ } catch (error) {
3074
+ const message = error instanceof Error ? error.message : String(error);
3075
+ console.error(message);
2989
3076
  process.exitCode = 1;
2990
3077
  return;
2991
3078
  }
2992
3079
 
2993
- // Fetch latest changes
2994
- console.log("Fetching latest changes...");
2995
- await execFilePromise("git", ["fetch", "origin"]);
2996
-
2997
- // Check current branch
2998
- const { stdout: branch } = await execFilePromise("git", ["rev-parse", "--abbrev-ref", "HEAD"]);
2999
- const currentBranch = branch.trim();
3000
- console.log(`Current branch: ${currentBranch}`);
3001
-
3002
- // Pull latest changes
3003
- console.log("Pulling latest changes...");
3004
- const { stdout: pullOut } = await execFilePromise("git", ["pull", "origin", currentBranch]);
3005
- console.log(pullOut);
3080
+ // Step 1: migrate .env so newer stack versions that require additional
3081
+ // env vars (e.g. VM_AUTH_USERNAME / VM_AUTH_PASSWORD introduced in 0.15)
3082
+ // don't make `docker compose up` fail silently for users who installed
3083
+ // before those vars existed. Purely additive: existing values are kept.
3084
+ console.log("Checking .env for newly-required keys...");
3085
+ const added = ensureRequiredEnvVars(projectDir);
3086
+ if (added.length > 0) {
3087
+ console.log(`✓ Added missing .env keys: ${added.join(", ")}`);
3088
+ console.log(" (existing values preserved; missing keys filled with safe defaults)");
3089
+ } else {
3090
+ console.log(" .env is up to date");
3091
+ }
3092
+ console.log();
3093
+
3094
+ // Step 2: refresh repo if this is a git-based deployment. Some users
3095
+ // upgrade purely via `npm install -g postgresai@latest` and don't have a
3096
+ // git checkout - in that case we skip git operations and still do the
3097
+ // env migration + docker pull.
3098
+ const gitDir = path.resolve(projectDir, ".git");
3099
+ if (fs.existsSync(gitDir)) {
3100
+ console.log("Fetching latest changes...");
3101
+ await execFilePromise("git", ["fetch", "origin"]);
3102
+
3103
+ const { stdout: branch } = await execFilePromise("git", ["rev-parse", "--abbrev-ref", "HEAD"]);
3104
+ const currentBranch = branch.trim();
3105
+ console.log(`Current branch: ${currentBranch}`);
3106
+
3107
+ console.log("Pulling latest changes...");
3108
+ const { stdout: pullOut } = await execFilePromise("git", ["pull", "origin", currentBranch]);
3109
+ console.log(pullOut);
3110
+ } else {
3111
+ console.log("(not a git checkout — skipping git fetch/pull and going straight to image pull)");
3112
+ }
3006
3113
 
3007
- // Update Docker images
3114
+ // Step 3: pull new images.
3008
3115
  console.log("\nUpdating Docker images...");
3009
3116
  const code = await runCompose(["pull"]);
3010
3117
 
@@ -13423,7 +13423,7 @@ var {
13423
13423
  // package.json
13424
13424
  var package_default = {
13425
13425
  name: "postgresai",
13426
- version: "0.15.0-rc.4",
13426
+ version: "0.15.0-rc.5",
13427
13427
  description: "postgres_ai CLI",
13428
13428
  license: "Apache-2.0",
13429
13429
  private: false,
@@ -16254,7 +16254,7 @@ var Result = import_lib.default.Result;
16254
16254
  var TypeOverrides = import_lib.default.TypeOverrides;
16255
16255
  var defaults = import_lib.default.defaults;
16256
16256
  // package.json
16257
- var version = "0.15.0-rc.4";
16257
+ var version = "0.15.0-rc.5";
16258
16258
  var package_default2 = {
16259
16259
  name: "postgresai",
16260
16260
  version,
@@ -27687,7 +27687,7 @@ where
27687
27687
  statement_timeout_seconds: 300
27688
27688
  },
27689
27689
  pg_invalid_indexes: {
27690
- description: "This metric identifies invalid indexes in the database with decision tree data for remediation. It provides insights into whether to DROP (if duplicate exists), RECREATE (if backs constraint), or flag as UNCERTAIN (if additional RCA is needed to check query plans). Decision tree: 1) Valid duplicate exists -> DROP, 2) Backs PK/UNIQUE constraint -> RECREATE, 3) Table < 10K rows -> RECREATE (small tables rebuild quickly, typically under 1 second), 4) Otherwise -> UNCERTAIN (need query plan analysis to assess impact).",
27690
+ description: "This metric identifies invalid indexes in the database with decision tree data for remediation. It provides insights into whether to DROP (if duplicate exists), RECREATE (if backs constraint), or flag as UNCERTAIN (if additional RCA is needed to check query plans). Decision tree: 1) Valid duplicate exists -> DROP, 2) Backs PK/UNIQUE constraint -> RECREATE, 3) Table < 10K rows -> RECREATE (small tables rebuild quickly, typically under 1 second), 4) Otherwise -> UNCERTAIN (need query plan analysis to assess impact). Adapts the top-N + `'$other$'` bucket pattern from !262 to this metric: ranks invalid indexes by `index_size_bytes desc` (ties broken by schema, table, then index name for stability), keeps the top 100, and folds the tail into a single `'$other$'` row whose `index_size_bytes` / `table_row_estimate` are summed and whose tag columns carry the literal `'$other$'` sentinel. The `'$other$'` row is omitted entirely (via `HAVING count(*) > 0`) when all invalid indexes fit within the top-100 cap, so its absence on healthy clusters is normal.",
27691
27691
  sqls: {
27692
27692
  11: `with fk_indexes as ( /* pgwatch_generated */
27693
27693
  select
@@ -27755,25 +27755,65 @@ data as (
27755
27755
  left join valid_duplicates vd on vd.invalid_indexrelid = pidx.indexrelid
27756
27756
  where pidx.indisvalid = false
27757
27757
  ),
27758
- num_data as (
27758
+ ranked as (
27759
27759
  select
27760
- row_number() over () as num,
27760
+ row_number() over (
27761
+ order by index_size_bytes desc nulls last,
27762
+ tag_schema_name, tag_table_name, tag_index_name
27763
+ ) as num,
27761
27764
  data.*
27762
27765
  from data
27763
27766
  )
27764
27767
  select
27765
27768
  (extract(epoch from now()) * 1e9)::int8 as epoch_ns,
27766
27769
  current_database() as tag_datname,
27767
- num_data.*
27768
- from num_data
27769
- limit 1000;
27770
+ num,
27771
+ tag_index_name,
27772
+ tag_schema_name,
27773
+ tag_table_name,
27774
+ tag_relation_name,
27775
+ index_definition,
27776
+ index_size_bytes,
27777
+ is_pk,
27778
+ is_unique,
27779
+ constraint_name,
27780
+ table_row_estimate,
27781
+ has_valid_duplicate,
27782
+ valid_index_name,
27783
+ valid_index_definition,
27784
+ supports_fk
27785
+ from ranked
27786
+ where num <= 100
27787
+ union all
27788
+ select
27789
+ (extract(epoch from now()) * 1e9)::int8 as epoch_ns,
27790
+ current_database() as tag_datname,
27791
+ 0::bigint as num,
27792
+ '$other$'::text as tag_index_name,
27793
+ '$other$'::text as tag_schema_name,
27794
+ '$other$'::text as tag_table_name,
27795
+ '$other$'::text as tag_relation_name,
27796
+ '$other$'::text as index_definition,
27797
+ coalesce(sum(index_size_bytes), 0)::int8 as index_size_bytes,
27798
+ false as is_pk,
27799
+ false as is_unique,
27800
+ '$other$'::text as constraint_name,
27801
+ coalesce(sum(table_row_estimate), 0)::bigint as table_row_estimate,
27802
+ bool_or(has_valid_duplicate) as has_valid_duplicate,
27803
+ '$other$'::text as valid_index_name,
27804
+ '$other$'::text as valid_index_definition,
27805
+ coalesce(max(supports_fk), 0)::int as supports_fk
27806
+ from ranked
27807
+ where num > 100
27808
+ group by ()
27809
+ having count(*) > 0;
27770
27810
  `
27771
27811
  },
27772
27812
  gauges: ["*"],
27773
27813
  statement_timeout_seconds: 15
27774
27814
  },
27775
27815
  unused_indexes: {
27776
- description: "This metric identifies unused indexes in the database. It provides insights into the number of unused indexes and their details. This metric helps administrators identify and fix unused indexes to improve database performance.",
27816
+ description: "This metric identifies unused indexes in the database. It provides insights into the number of unused indexes and their details. This metric helps administrators identify and fix unused indexes to improve database performance. Adapts the top-N + `'$other$'` bucket pattern from !262 to this metric: within the `idx_scan = 0 AND idx_is_btree` filter, ranks indexes by `index_size_bytes desc` (ties broken by schema, table, index name), keeps the top 100, and folds the tail into a single `'$other$'` row. Counter columns (`idx_scan`, `all_scans`, `writes`, `index_size_bytes`, `table_size_bytes`, `relpages`) are summed across the tail; ratio columns (`index_scan_pct`, `scans_per_write`) and the `supports_fk` boolean are deliberately zeroed/false on the aggregate row because the tail-level average would mislead and the per-row FK relationship has no meaningful aggregate. Tag columns carry the literal `'$other$'` sentinel. The `'$other$'` row is omitted entirely (via `HAVING count(*) > 0`) when ≤100 indexes match the unused filter.",
27777
27817
  sqls: {
27778
27818
  11: `with fk_indexes as ( /* pgwatch_generated */
27779
27819
  select
@@ -27852,6 +27892,17 @@ limit 1000;
27852
27892
  from indexes i
27853
27893
  join table_scans ts on ts.relid = i.indrelid
27854
27894
  )
27895
+ , ranked as (
27896
+ select
27897
+ row_number() over (
27898
+ order by index_size_bytes desc nulls last,
27899
+ schema_name, table_name, index_name
27900
+ ) as num,
27901
+ *
27902
+ from index_ratios
27903
+ where idx_scan = 0
27904
+ and idx_is_btree
27905
+ )
27855
27906
  select
27856
27907
  'Never Used Indexes' as tag_reason,
27857
27908
  current_database() as tag_datname,
@@ -27871,19 +27922,39 @@ select
27871
27922
  idx_is_btree,
27872
27923
  opclasses as tag_opclasses,
27873
27924
  supports_fk
27874
- from index_ratios
27875
- where
27876
- idx_scan = 0
27877
- and idx_is_btree
27878
- order by index_size_bytes desc
27879
- limit 1000;
27925
+ from ranked
27926
+ where num <= 100
27927
+ union all
27928
+ select
27929
+ 'Never Used Indexes' as tag_reason,
27930
+ current_database() as tag_datname,
27931
+ 0::oid as index_id,
27932
+ '$other$'::text as tag_schema_name,
27933
+ '$other$'::text as tag_table_name,
27934
+ '$other$'::text as tag_index_name,
27935
+ '$other$'::text as index_definition,
27936
+ coalesce(sum(idx_scan), 0)::int8 as idx_scan,
27937
+ coalesce(sum(all_scans), 0)::int8 as all_scans,
27938
+ 0::numeric as index_scan_pct,
27939
+ coalesce(sum(writes), 0)::int8 as writes,
27940
+ 0::numeric as scans_per_write,
27941
+ coalesce(sum(index_size_bytes), 0)::int8 as index_size_bytes,
27942
+ coalesce(sum(table_size_bytes), 0)::int8 as table_size_bytes,
27943
+ coalesce(sum(relpages), 0)::int4 as relpages,
27944
+ true as idx_is_btree,
27945
+ '$other$'::text as tag_opclasses,
27946
+ false as supports_fk
27947
+ from ranked
27948
+ where num > 100
27949
+ group by ()
27950
+ having count(*) > 0;
27880
27951
  `
27881
27952
  },
27882
27953
  gauges: ["*"],
27883
27954
  statement_timeout_seconds: 15
27884
27955
  },
27885
27956
  redundant_indexes: {
27886
- description: "This metric identifies redundant indexes that can potentially be dropped to save storage space and improve write performance. It analyzes index relationships and finds indexes that are covered by other indexes, considering column order, operator classes, and foreign key constraints. Uses the exact logic from tmp.sql with JSON aggregation and proper thresholds.",
27957
+ description: "This metric identifies redundant indexes that can potentially be dropped to save storage space and improve write performance. It analyzes index relationships and finds indexes that are covered by other indexes, considering column order, operator classes, and foreign key constraints. Uses the exact logic from tmp.sql with JSON aggregation and proper thresholds. Adapts the top-N + `'$other$'` bucket pattern from !262 to this metric: ranks redundant indexes by `index_size_bytes desc` (ties broken by `table_name`), keeps the top 100, and folds the tail into a single `'$other$'` row whose `table_size_bytes`, `index_size_bytes` and `index_usage` columns are summed and whose tag columns carry the literal `'$other$'` sentinel. The `redundant_indexes_grouped` CTE intentionally preserves duplicate column aliases (`tag_schema_name` / `tag_index_name` appear twice — once from the raw name and once from the `formated_*` variant) because the dashboards rely on both spellings; the duplication is preserved on the `'$other$'` row for consistency. The `'$other$'` row is omitted entirely (via `HAVING count(*) > 0`) when there are ≤100 redundant pairs, so its absence on healthy clusters is normal.",
27887
27958
  sqls: {
27888
27959
  11: `with fk_indexes as ( /* pgwatch_generated */
27889
27960
  select
@@ -28035,9 +28106,43 @@ redundant_indexes_tmp_num as (
28035
28106
  formated_relation_name,
28036
28107
  supports_fk
28037
28108
  order by index_size_bytes desc
28109
+ ),
28110
+ -- redundant_indexes_grouped intentionally exposes duplicate aliases
28111
+ -- (tag_schema_name / tag_index_name appear twice — once from the
28112
+ -- raw name and once from the formated_* variant). select * over it
28113
+ -- preserves both. Order by table_name (unique, non-duplicated).
28114
+ ranked as (
28115
+ select
28116
+ row_number() over (
28117
+ order by index_size_bytes desc nulls last, table_name
28118
+ ) as num,
28119
+ redundant_indexes_grouped.*
28120
+ from redundant_indexes_grouped
28038
28121
  )
28039
- select * from redundant_indexes_grouped
28040
- limit 1000;
28122
+ select * from ranked where num <= 100
28123
+ union all
28124
+ select
28125
+ 0::bigint as num,
28126
+ 0::oid as index_id,
28127
+ '$other$'::text as tag_schema_name,
28128
+ '$other$'::text as table_name,
28129
+ coalesce(sum(table_size_bytes), 0)::int8 as table_size_bytes,
28130
+ '$other$'::text as tag_index_name,
28131
+ '$other$'::text as tag_access_method,
28132
+ '$other$'::text as tag_reason,
28133
+ coalesce(sum(index_size_bytes), 0)::int8 as index_size_bytes,
28134
+ coalesce(sum(index_usage), 0)::int8 as index_usage,
28135
+ '$other$'::text as index_definition,
28136
+ '$other$'::text as tag_index_name,
28137
+ '$other$'::text as tag_schema_name,
28138
+ '$other$'::text as tag_table_name,
28139
+ '$other$'::text as tag_relation_name,
28140
+ coalesce(max(supports_fk), 0)::int as supports_fk,
28141
+ '$other$'::text as redundant_to_json
28142
+ from ranked
28143
+ where num > 100
28144
+ group by ()
28145
+ having count(*) > 0;
28041
28146
  `
28042
28147
  },
28043
28148
  gauges: ["*"],
@@ -28059,93 +28164,139 @@ where datname = current_database()
28059
28164
  statement_timeout_seconds: 15
28060
28165
  },
28061
28166
  pg_table_bloat: {
28062
- description: "This metric analyzes estimated table bloat by calculating the estimated vs actual table pages and sizes. It provides insights into estimated bloat percentage, real size, extra size due to estimated bloat, and estimated bloat size considering fill factor. This metric helps administrators identify tables that may need maintenance like VACUUM FULL or table reorganization.",
28167
+ description: "Estimated per-table bloat (heap pages allocated vs heap pages needed at perfect packing), bounded to the top 100 per database. Adapts the top-N + `'$other$'` bucket pattern from !262: everything below the cap is summed into a single `'$other$'` row so dashboard \"total bloat across the DB\" stays correct even when the tail is large. Ranks by `bloat_pct` descending (most-bloated tables first), with `is_na = 0` preferred (don't crowd top-N with tables whose estimate is unreliable) and stable schemaname/tblname tiebreakers. Preserves the existing >1 MiB filter (zero-byte and tiny tables aren't interesting for bloat). Aggregate semantics on the `'$other$'` row: sum for real_size_mib / extra_size / bloat_size (total wasted bytes in the tail); recompute extra_pct and bloat_pct from the summed numerator/denominator (weighted-avg effectively); avg(fillfactor); max(is_na) (any tail row with bad stats taints the aggregate). The `'$other$'` sentinel cannot collide with a real Postgres identifier.",
28063
28168
  sqls: {
28064
- 11: `select current_database() as tag_datname, schemaname as tag_schemaname, tblname as tag_tblname, (bs*tblpages)/(1024*1024)::float as real_size_mib, /* pgwatch_generated */
28065
- (tblpages-est_tblpages)*bs as extra_size,
28066
- case when tblpages > 0 and tblpages - est_tblpages > 0
28067
- then 100 * (tblpages - est_tblpages)/tblpages::float
28068
- else 0
28069
- end as extra_pct, fillfactor,
28070
- case when tblpages - est_tblpages_ff > 0
28071
- then (tblpages-est_tblpages_ff)*bs
28072
- else 0
28073
- end as bloat_size,
28074
- case when tblpages > 0 and tblpages - est_tblpages_ff > 0
28075
- then 100 * (tblpages - est_tblpages_ff)/tblpages::float
28076
- else 0
28077
- end as bloat_pct, is_na
28078
- -- , tpl_hdr_size, tpl_data_size, (pst).free_percent + (pst).dead_tuple_percent as real_frag -- (DEBUG INFO)
28079
- from (
28080
- select ceil( reltuples / ( (bs-page_hdr)/tpl_size ) ) + ceil( toasttuples / 4 ) as est_tblpages,
28081
- ceil( reltuples / ( (bs-page_hdr)*fillfactor/(tpl_size*100) ) ) + ceil( toasttuples / 4 ) as est_tblpages_ff,
28082
- tblpages, fillfactor, bs, tblid, schemaname, tblname, heappages, toastpages, is_na
28083
- -- , tpl_hdr_size, tpl_data_size, pgstattuple(tblid) as pst -- (DEBUG INFO)
28169
+ 11: `with bloat_data as ( /* pgwatch_generated */
28170
+ select schemaname, tblname,
28171
+ (bs*tblpages)/(1024*1024)::float as real_size_mib,
28172
+ (tblpages-est_tblpages)*bs as extra_size,
28173
+ case when tblpages > 0 and tblpages - est_tblpages > 0
28174
+ then 100 * (tblpages - est_tblpages)/tblpages::float
28175
+ else 0
28176
+ end as extra_pct,
28177
+ fillfactor,
28178
+ case when tblpages - est_tblpages_ff > 0
28179
+ then (tblpages-est_tblpages_ff)*bs
28180
+ else 0
28181
+ end as bloat_size,
28182
+ case when tblpages > 0 and tblpages - est_tblpages_ff > 0
28183
+ then 100 * (tblpages - est_tblpages_ff)/tblpages::float
28184
+ else 0
28185
+ end as bloat_pct,
28186
+ is_na,
28187
+ -- carried for the $other$ aggregate denominators
28188
+ bs, tblpages, est_tblpages, est_tblpages_ff
28084
28189
  from (
28085
- select
28086
- ( 4 + tpl_hdr_size + tpl_data_size + (2*ma)
28087
- - case when tpl_hdr_size%ma = 0 then ma else tpl_hdr_size%ma end
28088
- - case when ceil(tpl_data_size)::int%ma = 0 then ma else ceil(tpl_data_size)::int%ma end
28089
- ) as tpl_size, bs - page_hdr as size_per_block, (heappages + toastpages) as tblpages, heappages,
28090
- toastpages, reltuples, toasttuples, bs, page_hdr, tblid, schemaname, tblname, fillfactor, is_na
28091
- -- , tpl_hdr_size, tpl_data_size
28190
+ select ceil( reltuples / ( (bs-page_hdr)/tpl_size ) ) + ceil( toasttuples / 4 ) as est_tblpages,
28191
+ ceil( reltuples / ( (bs-page_hdr)*fillfactor/(tpl_size*100) ) ) + ceil( toasttuples / 4 ) as est_tblpages_ff,
28192
+ tblpages, fillfactor, bs, tblid, schemaname, tblname, heappages, toastpages, is_na
28092
28193
  from (
28093
28194
  select
28094
- tbl.oid as tblid, ns.nspname as schemaname, tbl.relname as tblname, tbl.reltuples,
28095
- tbl.relpages as heappages, coalesce(toast.relpages, 0) as toastpages,
28096
- coalesce(toast.reltuples, 0) as toasttuples,
28097
- coalesce(substring(
28098
- array_to_string(tbl.reloptions, ' ')
28099
- from 'fillfactor=([0-9]+)')::smallint, 100) as fillfactor,
28100
- current_setting('block_size')::numeric as bs,
28101
- case when version()~'mingw32' or version()~'64-bit|x86_64|ppc64|ia64|amd64' then 8 else 4 end as ma,
28102
- 24 as page_hdr,
28103
- 23 + case when max(coalesce(s.null_frac,0)) > 0 then ( 7 + count(s.attname) ) / 8 else 0::int end
28104
- + case when bool_or(att.attname = 'oid' and att.attnum < 0) then 4 else 0 end as tpl_hdr_size,
28105
- sum( (1-coalesce(s.null_frac, 0)) * coalesce(s.avg_width, 0) ) as tpl_data_size,
28106
- (bool_or(att.atttypid = 'pg_catalog.name'::regtype)
28107
- or sum(case when att.attnum > 0 then 1 else 0 end) <> count(s.attname))::int as is_na
28108
- from pg_attribute as att
28109
- join pg_class as tbl on att.attrelid = tbl.oid
28110
- join pg_namespace as ns on ns.oid = tbl.relnamespace
28111
- left join postgres_ai.pg_statistic as s on s.schemaname=ns.nspname
28112
- and s.tablename = tbl.relname and s.inherited=false and s.attname=att.attname
28113
- left join pg_class as toast on tbl.reltoastrelid = toast.oid
28114
- where not att.attisdropped
28115
- and tbl.relkind in ('r','m')
28116
- group by 1,2,3,4,5,6,7,8,9,10
28117
- order by 2,3
28118
- ) as s
28119
- ) as s2
28120
- ) as s3
28121
- -- where not is_na
28122
- -- and tblpages*((pst).free_percent + (pst).dead_tuple_percent)::float4/100 >= 1
28123
- where (bs * tblpages::float / (1024 * 1024)) > 1 /* exclude tables below 1 MiB */
28124
- order by is_na = 0 desc, bloat_pct desc
28125
- limit 1000
28195
+ ( 4 + tpl_hdr_size + tpl_data_size + (2*ma)
28196
+ - case when tpl_hdr_size%ma = 0 then ma else tpl_hdr_size%ma end
28197
+ - case when ceil(tpl_data_size)::int%ma = 0 then ma else ceil(tpl_data_size)::int%ma end
28198
+ ) as tpl_size, bs - page_hdr as size_per_block, (heappages + toastpages) as tblpages, heappages,
28199
+ toastpages, reltuples, toasttuples, bs, page_hdr, tblid, schemaname, tblname, fillfactor, is_na
28200
+ from (
28201
+ select
28202
+ tbl.oid as tblid, ns.nspname as schemaname, tbl.relname as tblname, tbl.reltuples,
28203
+ tbl.relpages as heappages, coalesce(toast.relpages, 0) as toastpages,
28204
+ coalesce(toast.reltuples, 0) as toasttuples,
28205
+ coalesce(substring(
28206
+ array_to_string(tbl.reloptions, ' ')
28207
+ from 'fillfactor=([0-9]+)')::smallint, 100) as fillfactor,
28208
+ current_setting('block_size')::numeric as bs,
28209
+ case when version()~'mingw32' or version()~'64-bit|x86_64|ppc64|ia64|amd64' then 8 else 4 end as ma,
28210
+ 24 as page_hdr,
28211
+ 23 + case when max(coalesce(s.null_frac,0)) > 0 then ( 7 + count(s.attname) ) / 8 else 0::int end
28212
+ + case when bool_or(att.attname = 'oid' and att.attnum < 0) then 4 else 0 end as tpl_hdr_size,
28213
+ sum( (1-coalesce(s.null_frac, 0)) * coalesce(s.avg_width, 0) ) as tpl_data_size,
28214
+ (bool_or(att.atttypid = 'pg_catalog.name'::regtype)
28215
+ or sum(case when att.attnum > 0 then 1 else 0 end) <> count(s.attname))::int as is_na
28216
+ from pg_attribute as att
28217
+ join pg_class as tbl on att.attrelid = tbl.oid
28218
+ join pg_namespace as ns on ns.oid = tbl.relnamespace
28219
+ left join postgres_ai.pg_statistic as s on s.schemaname=ns.nspname
28220
+ and s.tablename = tbl.relname and s.inherited=false and s.attname=att.attname
28221
+ left join pg_class as toast on tbl.reltoastrelid = toast.oid
28222
+ where not att.attisdropped
28223
+ and tbl.relkind in ('r','m')
28224
+ group by 1,2,3,4,5,6,7,8,9,10
28225
+ order by 2,3
28226
+ ) as s
28227
+ ) as s2
28228
+ ) as s3
28229
+ where (bs * tblpages::float / (1024 * 1024)) > 1 /* exclude tables below 1 MiB */
28230
+ ),
28231
+ ranked as (
28232
+ select
28233
+ row_number() over (
28234
+ order by is_na = 0 desc, bloat_pct desc nulls last,
28235
+ schemaname, tblname
28236
+ ) as rownum,
28237
+ *
28238
+ from bloat_data
28239
+ )
28240
+ select
28241
+ current_database() as tag_datname,
28242
+ schemaname as tag_schemaname,
28243
+ tblname as tag_tblname,
28244
+ real_size_mib,
28245
+ extra_size,
28246
+ extra_pct,
28247
+ fillfactor,
28248
+ bloat_size,
28249
+ bloat_pct,
28250
+ is_na
28251
+ from ranked
28252
+ where rownum <= 100
28253
+ union all
28254
+ select
28255
+ current_database() as tag_datname,
28256
+ '$other$'::text as tag_schemaname,
28257
+ '$other$'::text as tag_tblname,
28258
+ coalesce(sum(real_size_mib), 0)::float as real_size_mib,
28259
+ coalesce(sum(extra_size), 0)::int8 as extra_size,
28260
+ case when sum(tblpages) > 0
28261
+ then 100 * sum(greatest(tblpages - est_tblpages, 0))::float / sum(tblpages)
28262
+ else 0
28263
+ end::float as extra_pct,
28264
+ coalesce(avg(fillfactor), 100)::smallint as fillfactor,
28265
+ coalesce(sum(bloat_size), 0)::int8 as bloat_size,
28266
+ case when sum(tblpages) > 0
28267
+ then 100 * sum(greatest(tblpages - est_tblpages_ff, 0))::float / sum(tblpages)
28268
+ else 0
28269
+ end::float as bloat_pct,
28270
+ coalesce(max(is_na), 0)::int as is_na
28271
+ from ranked
28272
+ where rownum > 100
28273
+ group by ()
28274
+ having count(*) > 0
28126
28275
  `
28127
28276
  },
28128
28277
  gauges: ["real_size_mib", "extra_size", "extra_pct", "fillfactor", "bloat_size", "bloat_pct", "is_na", "reltuples"],
28129
28278
  statement_timeout_seconds: 300
28130
28279
  },
28131
28280
  pg_btree_bloat: {
28132
- description: "This metric analyzes estimated index bloat by calculating the estimated vs actual index pages and sizes. It provides insights into estimated bloat percentage, real size, extra size due to estimated bloat, and estimated bloat size considering fill factor. This metric helps administrators identify indexes that may need maintenance like VACUUM FULL or index reorganization.",
28281
+ description: "Estimated per-btree-index bloat (index pages allocated vs index pages needed at perfect packing), bounded to the top 100 per database. Adapts the top-N + `'$other$'` bucket pattern from !262. Ranks by `bloat_pct` descending with `is_na = 0` preferred and stable schema/table/idx tiebreakers. Preserves the existing >1 MiB filter. Aggregate semantics on the `'$other$'` row: sum for real_size_mib / extra_size / bloat_size; recompute extra_pct and bloat_pct from sum(relpages-est_pages) / sum(relpages) (weighted avg over the tail); avg(fillfactor); max(is_na); table_size_mib doesn't aggregate meaningfully across indexes on different tables, so the `'$other$'` row reports 0. The `'$other$'` sentinel cannot collide with a real Postgres identifier.",
28133
28282
  sqls: {
28134
- 11: `select /* pgwatch_generated */
28135
- current_database() as tag_datname, nspname as tag_schemaname, tblname as tag_tblname, idxname as tag_idxname,
28136
- (bs*(relpages)/(1024*1024))::float as real_size_mib,
28137
- (pg_relation_size(tbloid)/(1024*1024))::float as table_size_mib,
28138
- (bs*(relpages-est_pages))::float as extra_size,
28139
- 100 * (relpages-est_pages)::float / relpages as extra_pct,
28140
- fillfactor,
28141
- case when relpages > est_pages_ff
28142
- then bs*(relpages-est_pages_ff)
28143
- else 0
28144
- end as bloat_size,
28145
- 100 * (relpages-est_pages_ff)::float / relpages as bloat_pct,
28146
- is_na
28147
- -- , 100-(pst).avg_leaf_density as pst_avg_bloat, est_pages, index_tuple_hdr_bm, maxalign, pagehdr, nulldatawidth, nulldatahdrwidth, reltuples, relpages -- (DEBUG INFO)
28148
- from (
28283
+ 11: `with bloat_data as ( /* pgwatch_generated */
28284
+ select
28285
+ nspname, tblname, idxname,
28286
+ (bs*(relpages)/(1024*1024))::float as real_size_mib,
28287
+ (pg_relation_size(tbloid)/(1024*1024))::float as table_size_mib,
28288
+ (bs*(relpages-est_pages))::float as extra_size,
28289
+ 100 * (relpages-est_pages)::float / relpages as extra_pct,
28290
+ fillfactor,
28291
+ case when relpages > est_pages_ff
28292
+ then bs*(relpages-est_pages_ff)
28293
+ else 0
28294
+ end as bloat_size,
28295
+ 100 * (relpages-est_pages_ff)::float / relpages as bloat_pct,
28296
+ is_na,
28297
+ -- carried for the $other$ aggregate denominators
28298
+ bs, relpages, est_pages, est_pages_ff
28299
+ from (
28149
28300
  select coalesce(1 +
28150
28301
  ceil(reltuples/floor((bs-pageopqdata-pagehdr)/(4+nulldatahdrwidth)::float)), 0 -- ItemIdData size + computed avg size of a tuple (nulldatahdrwidth)
28151
28302
  ) as est_pages,
@@ -28232,8 +28383,55 @@ from (
28232
28383
  ) as rows_hdr_pdg_stats
28233
28384
  ) as relation_stats
28234
28385
  where (bs * relpages::float / (1024 * 1024)) > 1 /* exclude indexes below 1 MiB */
28235
- order by is_na = 0 desc, bloat_pct desc
28236
- limit 1000
28386
+ ),
28387
+ ranked as (
28388
+ select
28389
+ row_number() over (
28390
+ order by is_na = 0 desc, bloat_pct desc nulls last,
28391
+ nspname, tblname, idxname
28392
+ ) as rownum,
28393
+ *
28394
+ from bloat_data
28395
+ )
28396
+ select
28397
+ current_database() as tag_datname,
28398
+ nspname as tag_schemaname,
28399
+ tblname as tag_tblname,
28400
+ idxname as tag_idxname,
28401
+ real_size_mib,
28402
+ table_size_mib,
28403
+ extra_size,
28404
+ extra_pct,
28405
+ fillfactor,
28406
+ bloat_size,
28407
+ bloat_pct,
28408
+ is_na
28409
+ from ranked
28410
+ where rownum <= 100
28411
+ union all
28412
+ select
28413
+ current_database() as tag_datname,
28414
+ '$other$'::text as tag_schemaname,
28415
+ '$other$'::text as tag_tblname,
28416
+ '$other$'::text as tag_idxname,
28417
+ coalesce(sum(real_size_mib), 0)::float as real_size_mib,
28418
+ 0::float as table_size_mib,
28419
+ coalesce(sum(extra_size), 0)::float as extra_size,
28420
+ case when sum(relpages) > 0
28421
+ then 100 * sum(greatest(relpages - est_pages, 0))::float / sum(relpages)
28422
+ else 0
28423
+ end::float as extra_pct,
28424
+ coalesce(avg(fillfactor), 90)::smallint as fillfactor,
28425
+ coalesce(sum(bloat_size), 0)::float as bloat_size,
28426
+ case when sum(relpages) > 0
28427
+ then 100 * sum(greatest(relpages - est_pages_ff, 0))::float / sum(relpages)
28428
+ else 0
28429
+ end::float as bloat_pct,
28430
+ coalesce(max(is_na), 0)::int as is_na
28431
+ from ranked
28432
+ where rownum > 100
28433
+ group by ()
28434
+ having count(*) > 0
28237
28435
  `
28238
28436
  },
28239
28437
  gauges: ["real_size_mib", "table_size_mib", "extra_size", "extra_pct", "fillfactor", "bloat_size", "bloat_pct", "is_na", "reltuples"],
@@ -33353,6 +33551,35 @@ function stripMatchingQuotes(value) {
33353
33551
  }
33354
33552
  return trimmed;
33355
33553
  }
33554
+ var REQUIRED_ENV_KEYS = [
33555
+ { key: "REPLICATOR_PASSWORD", defaultValue: () => crypto2.randomBytes(32).toString("hex"), introducedIn: "0.13" },
33556
+ { key: "VM_AUTH_USERNAME", defaultValue: () => "vmauth", introducedIn: "0.15" },
33557
+ { key: "VM_AUTH_PASSWORD", defaultValue: () => crypto2.randomBytes(18).toString("base64"), introducedIn: "0.15" }
33558
+ ];
33559
+ function ensureRequiredEnvVars(projectDir) {
33560
+ const envFile = path7.resolve(projectDir, ".env");
33561
+ const existing = fs8.existsSync(envFile) ? fs8.readFileSync(envFile, "utf8") : "";
33562
+ const added = [];
33563
+ const appendLines = [];
33564
+ for (const spec of REQUIRED_ENV_KEYS) {
33565
+ const re = new RegExp(`^${spec.key}=`, "m");
33566
+ if (!re.test(existing)) {
33567
+ appendLines.push(`${spec.key}=${spec.defaultValue()}`);
33568
+ added.push(spec.key);
33569
+ }
33570
+ }
33571
+ if (appendLines.length === 0) {
33572
+ return added;
33573
+ }
33574
+ const needsTrailingNewline = existing.length > 0 && !existing.endsWith(`
33575
+ `);
33576
+ const newContent = existing + (needsTrailingNewline ? `
33577
+ ` : "") + appendLines.join(`
33578
+ `) + `
33579
+ `;
33580
+ fs8.writeFileSync(envFile, newContent, { encoding: "utf8", mode: 384 });
33581
+ return added;
33582
+ }
33356
33583
  async function execFilePromise(file, args) {
33357
33584
  return new Promise((resolve8, reject) => {
33358
33585
  childProcess.execFile(file, args, (error2, stdout, stderr) => {
@@ -35627,29 +35854,61 @@ Instances configuration:
35627
35854
  console.log();
35628
35855
  }
35629
35856
  });
35630
- mon.command("update-config").description("apply monitoring services configuration (generate sources)").action(async () => {
35857
+ mon.command("update-config").description("apply monitoring services configuration (generate sources, migrate .env)").action(async () => {
35858
+ let projectDir;
35859
+ try {
35860
+ ({ projectDir } = await resolveOrInitPaths());
35861
+ } catch (error2) {
35862
+ const message = error2 instanceof Error ? error2.message : String(error2);
35863
+ console.error(message);
35864
+ process.exitCode = 1;
35865
+ return;
35866
+ }
35867
+ const added = ensureRequiredEnvVars(projectDir);
35868
+ if (added.length > 0) {
35869
+ console.log(`Added missing .env keys for this stack version: ${added.join(", ")}`);
35870
+ console.log(`(existing values were preserved; missing keys filled with safe defaults)
35871
+ `);
35872
+ }
35631
35873
  const code = await runCompose(["run", "--rm", "sources-generator"]);
35632
35874
  if (code !== 0)
35633
35875
  process.exitCode = code;
35634
35876
  });
35635
- mon.command("update").description("update monitoring stack").action(async () => {
35877
+ mon.command("update").description("update monitoring stack (migrate .env, pull images)").action(async () => {
35636
35878
  console.log(`Updating PostgresAI monitoring stack...
35637
35879
  `);
35638
35880
  try {
35639
- const gitDir = path7.resolve(process.cwd(), ".git");
35640
- if (!fs8.existsSync(gitDir)) {
35641
- console.error("Not a git repository. Cannot update.");
35881
+ let projectDir;
35882
+ try {
35883
+ ({ projectDir } = await resolveOrInitPaths());
35884
+ } catch (error2) {
35885
+ const message = error2 instanceof Error ? error2.message : String(error2);
35886
+ console.error(message);
35642
35887
  process.exitCode = 1;
35643
35888
  return;
35644
35889
  }
35645
- console.log("Fetching latest changes...");
35646
- await execFilePromise("git", ["fetch", "origin"]);
35647
- const { stdout: branch } = await execFilePromise("git", ["rev-parse", "--abbrev-ref", "HEAD"]);
35648
- const currentBranch = branch.trim();
35649
- console.log(`Current branch: ${currentBranch}`);
35650
- console.log("Pulling latest changes...");
35651
- const { stdout: pullOut } = await execFilePromise("git", ["pull", "origin", currentBranch]);
35652
- console.log(pullOut);
35890
+ console.log("Checking .env for newly-required keys...");
35891
+ const added = ensureRequiredEnvVars(projectDir);
35892
+ if (added.length > 0) {
35893
+ console.log(`\u2713 Added missing .env keys: ${added.join(", ")}`);
35894
+ console.log(" (existing values preserved; missing keys filled with safe defaults)");
35895
+ } else {
35896
+ console.log("\u2713 .env is up to date");
35897
+ }
35898
+ console.log();
35899
+ const gitDir = path7.resolve(projectDir, ".git");
35900
+ if (fs8.existsSync(gitDir)) {
35901
+ console.log("Fetching latest changes...");
35902
+ await execFilePromise("git", ["fetch", "origin"]);
35903
+ const { stdout: branch } = await execFilePromise("git", ["rev-parse", "--abbrev-ref", "HEAD"]);
35904
+ const currentBranch = branch.trim();
35905
+ console.log(`Current branch: ${currentBranch}`);
35906
+ console.log("Pulling latest changes...");
35907
+ const { stdout: pullOut } = await execFilePromise("git", ["pull", "origin", currentBranch]);
35908
+ console.log(pullOut);
35909
+ } else {
35910
+ console.log("(not a git checkout \u2014 skipping git fetch/pull and going straight to image pull)");
35911
+ }
35653
35912
  console.log(`
35654
35913
  Updating Docker images...`);
35655
35914
  const code = await runCompose(["pull"]);
@@ -57,17 +57,6 @@ export function getCheckupEntry(code: string): CheckupDictionaryEntry | null {
57
57
  return dictionaryByCode.get(code.toUpperCase()) ?? null;
58
58
  }
59
59
 
60
- /**
61
- * Get the title for a checkup code.
62
- *
63
- * @param code - The check code (e.g., "A001", "H002")
64
- * @returns The title or the code itself if not found
65
- */
66
- export function getCheckupTitle(code: string): string {
67
- const entry = getCheckupEntry(code);
68
- return entry?.title ?? code;
69
- }
70
-
71
60
  /**
72
61
  * Check if a code exists in the dictionary.
73
62
  *
package/lib/checkup.ts CHANGED
@@ -2,41 +2,41 @@
2
2
  * Express Checkup Module
3
3
  * ======================
4
4
  * Generates JSON health check reports directly from PostgreSQL without Prometheus.
5
- *
5
+ *
6
6
  * ARCHITECTURAL DECISIONS
7
7
  * -----------------------
8
- *
8
+ *
9
9
  * 1. SINGLE SOURCE OF TRUTH FOR SQL QUERIES
10
- * Complex metrics (index health, settings, db_stats) are loaded from
10
+ * Complex metrics (index health, settings, db_stats) are loaded from
11
11
  * config/pgwatch-prometheus/metrics.yml via getMetricSql() from metrics-loader.ts.
12
- *
12
+ *
13
13
  * Simple queries (version, database list, connection states, uptime) use
14
14
  * inline SQL as they're trivial and CLI-specific.
15
- *
15
+ *
16
16
  * 2. JSON SCHEMA COMPLIANCE
17
17
  * All generated reports MUST comply with JSON schemas in reporter/schemas/.
18
18
  * These schemas define the expected format for both:
19
19
  * - Full-fledged monitoring reporter output
20
20
  * - Express checkup output
21
- *
21
+ *
22
22
  * Before adding or modifying a report, verify the corresponding schema exists
23
23
  * and ensure the output matches. Run schema validation tests to confirm.
24
- *
24
+ *
25
25
  * 3. ERROR HANDLING STRATEGY
26
26
  * Functions follow two patterns based on criticality:
27
- *
27
+ *
28
28
  * PROPAGATING (throws on error):
29
29
  * - Core data functions: getPostgresVersion, getSettings, getAlteredSettings,
30
30
  * getDatabaseSizes, getInvalidIndexes, getUnusedIndexes, getRedundantIndexes
31
31
  * - If these fail, the entire report should fail (data is required)
32
32
  * - Callers should handle errors at the report generation level
33
- *
33
+ *
34
34
  * GRACEFUL DEGRADATION (catches errors, includes error in output):
35
35
  * - Optional/supplementary queries: pg_stat_statements, pg_stat_kcache checks,
36
36
  * memory calculations, postmaster startup time
37
37
  * - These are nice-to-have; missing data shouldn't fail the whole report
38
38
  * - Errors are logged and included in report output for visibility
39
- *
39
+ *
40
40
  * ADDING NEW REPORTS
41
41
  * ------------------
42
42
  * 1. Add/verify the metric exists in config/pgwatch-prometheus/metrics.yml
@@ -51,7 +51,7 @@ import * as fs from "fs";
51
51
  import * as path from "path";
52
52
  import * as pkg from "../package.json";
53
53
  import { getMetricSql, transformMetricRow, METRIC_NAMES } from "./metrics-loader";
54
- import { getCheckupTitle, buildCheckInfoMap } from "./checkup-dictionary";
54
+ import { buildCheckInfoMap } from "./checkup-dictionary";
55
55
 
56
56
  // Time constants
57
57
  const SECONDS_PER_DAY = 86400;
@@ -336,7 +336,7 @@ export function parseVersionNum(versionNum: string): { major: string; minor: str
336
336
  /**
337
337
  * Format bytes to human readable string using binary units (1024-based).
338
338
  * Uses IEC standard: KiB, MiB, GiB, etc.
339
- *
339
+ *
340
340
  * Note: PostgreSQL's pg_size_pretty() uses kB/MB/GB with 1024 base (technically
341
341
  * incorrect SI usage), but we follow IEC binary units per project style guide.
342
342
  */
@@ -387,7 +387,7 @@ function formatSettingPrettyValue(
387
387
  /**
388
388
  * Get PostgreSQL version information.
389
389
  * Uses simple inline SQL (trivial query, CLI-specific).
390
- *
390
+ *
391
391
  * @throws {Error} If database query fails (propagating - critical data)
392
392
  */
393
393
  export async function getPostgresVersion(client: Client): Promise<PostgresVersion> {
@@ -1084,7 +1084,7 @@ export const generateH004 = (client: Client, nodeName = "node-01") =>
1084
1084
 
1085
1085
  /**
1086
1086
  * Generate D004 report - pg_stat_statements and pg_stat_kcache settings.
1087
- *
1087
+ *
1088
1088
  * Uses graceful degradation: extension queries are wrapped in try-catch
1089
1089
  * because extensions may not be installed. Errors are included in the
1090
1090
  * report output rather than failing the entire report.
package/lib/init.ts CHANGED
@@ -87,7 +87,7 @@ export type AdminConnection = {
87
87
  /**
88
88
  * Check if an error indicates SSL negotiation failed and fallback to non-SSL should be attempted.
89
89
  * This mimics libpq's sslmode=prefer behavior.
90
- *
90
+ *
91
91
  * IMPORTANT: This should NOT match certificate errors (expired, invalid, self-signed)
92
92
  * as those are real errors the user needs to fix, not negotiation failures.
93
93
  */
@@ -1,6 +1,6 @@
1
1
  /**
2
2
  * Metrics loader for express checkup reports
3
- *
3
+ *
4
4
  * Loads SQL queries from embedded metrics data (generated from metrics.yml at build time).
5
5
  * Provides version-aware query selection and row transformation utilities.
6
6
  */
@@ -9,7 +9,7 @@ import { METRICS, MetricDefinition } from "./metrics-embedded";
9
9
 
10
10
  /**
11
11
  * Get SQL query for a specific metric, selecting the appropriate version.
12
- *
12
+ *
13
13
  * @param metricName - Name of the metric (e.g., "settings", "db_stats")
14
14
  * @param pgMajorVersion - PostgreSQL major version (default: 16)
15
15
  * @returns SQL query string
@@ -41,7 +41,7 @@ export function getMetricSql(metricName: string, pgMajorVersion: number = 16): s
41
41
 
42
42
  /**
43
43
  * Get metric definition including all metadata.
44
- *
44
+ *
45
45
  * @param metricName - Name of the metric
46
46
  * @returns MetricDefinition or undefined if not found
47
47
  */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "postgresai",
3
- "version": "0.15.0-rc.4",
3
+ "version": "0.15.0-rc.5",
4
4
  "description": "postgres_ai CLI",
5
5
  "license": "Apache-2.0",
6
6
  "private": false,
@@ -420,3 +420,126 @@ describe("upgrade CLI commands", () => {
420
420
  expect(stdout).toMatch(/health/i);
421
421
  }, { timeout: TEST_TIMEOUT });
422
422
  });
423
+
424
+ describe("in-place upgrade env migration (mon update / update-config)", () => {
425
+ /**
426
+ * Regression tests for the 0.14 -> 0.15 in-place upgrade gap (#203).
427
+ *
428
+ * Before this fix, a user who installed at 0.14 and ran the documented
429
+ * upgrade flow (`pgai mon update`) ended up with a .env file that lacked
430
+ * VM_AUTH_USERNAME / VM_AUTH_PASSWORD, so sink-prometheus exited with:
431
+ *
432
+ * fatal cannot read "/postgres_ai_configs/prometheus/prometheus.yml":
433
+ * cannot expand environment variables: missing "VM_AUTH_USERNAME" env var
434
+ *
435
+ * `mon update` and `mon update-config` now migrate .env additively before
436
+ * doing anything else.
437
+ */
438
+
439
+ let tempDir: string;
440
+
441
+ beforeAll(() => {
442
+ tempDir = fs.mkdtempSync(resolve(os.tmpdir(), "pgai-upgrade-env-migration-"));
443
+ });
444
+
445
+ afterAll(() => {
446
+ if (tempDir && fs.existsSync(tempDir)) {
447
+ fs.rmSync(tempDir, { recursive: true, force: true });
448
+ }
449
+ });
450
+
451
+ test("mon update-config appends missing VM_AUTH_USERNAME / VM_AUTH_PASSWORD to a 0.14-shaped .env", () => {
452
+ const testDir = resolve(tempDir, "update-config-0.14-env");
453
+ fs.mkdirSync(testDir, { recursive: true });
454
+
455
+ // 0.14-shaped .env: PGAI_TAG present, VM_AUTH_* absent.
456
+ fs.writeFileSync(resolve(testDir, ".env"), "PGAI_TAG=0.14.0\nGF_SECURITY_ADMIN_PASSWORD=user-set-grafana-pw\n");
457
+ fs.writeFileSync(resolve(testDir, "docker-compose.yml"), "version: '3'\nservices: {}\n");
458
+ fs.writeFileSync(resolve(testDir, "instances.yml"), "# instances\n");
459
+
460
+ // The compose run will fail (no Docker in CI), but env migration runs first.
461
+ runCliInDir(["mon", "update-config"], testDir, { PGAI_TAG: undefined });
462
+
463
+ const envContent = fs.readFileSync(resolve(testDir, ".env"), "utf8");
464
+
465
+ // Existing values must be preserved verbatim.
466
+ expect(envContent).toMatch(/^PGAI_TAG=0\.14\.0$/m);
467
+ expect(envContent).toMatch(/^GF_SECURITY_ADMIN_PASSWORD=user-set-grafana-pw$/m);
468
+
469
+ // New required keys must be appended (vmauth username + non-empty base64 password).
470
+ expect(envContent).toMatch(/^VM_AUTH_USERNAME=vmauth$/m);
471
+ expect(envContent).toMatch(/^VM_AUTH_PASSWORD=[A-Za-z0-9+/]+={0,2}$/m);
472
+
473
+ // REPLICATOR_PASSWORD was introduced earlier and is also part of the contract.
474
+ expect(envContent).toMatch(/^REPLICATOR_PASSWORD=[a-f0-9]{64}$/m);
475
+ }, { timeout: TEST_TIMEOUT });
476
+
477
+ test("mon update appends missing VM_AUTH_USERNAME / VM_AUTH_PASSWORD to a 0.14-shaped .env", () => {
478
+ const testDir = resolve(tempDir, "update-0.14-env");
479
+ fs.mkdirSync(testDir, { recursive: true });
480
+
481
+ fs.writeFileSync(resolve(testDir, ".env"), "PGAI_TAG=0.14.0\n");
482
+ fs.writeFileSync(resolve(testDir, "docker-compose.yml"), "version: '3'\nservices: {}\n");
483
+ fs.writeFileSync(resolve(testDir, "instances.yml"), "# instances\n");
484
+
485
+ // mon update will fail (no Docker in CI, no git repo), but env migration runs first.
486
+ const result = runCliInDir(["mon", "update"], testDir, { PGAI_TAG: undefined });
487
+
488
+ const envContent = fs.readFileSync(resolve(testDir, ".env"), "utf8");
489
+
490
+ expect(envContent).toMatch(/^PGAI_TAG=0\.14\.0$/m);
491
+ expect(envContent).toMatch(/^VM_AUTH_USERNAME=vmauth$/m);
492
+ expect(envContent).toMatch(/^VM_AUTH_PASSWORD=[A-Za-z0-9+/]+={0,2}$/m);
493
+
494
+ // The migration step should print what it added so the user can see it.
495
+ expect(result.stdout).toMatch(/Added missing \.env keys/);
496
+ expect(result.stdout).toMatch(/VM_AUTH_USERNAME/);
497
+ expect(result.stdout).toMatch(/VM_AUTH_PASSWORD/);
498
+ }, { timeout: TEST_TIMEOUT });
499
+
500
+ test("mon update preserves existing VM_AUTH_* values (no rotation)", () => {
501
+ const testDir = resolve(tempDir, "update-preserve-vm-auth");
502
+ fs.mkdirSync(testDir, { recursive: true });
503
+
504
+ // User already has VM auth configured (e.g. set up via rotate-vm-auth.sh).
505
+ fs.writeFileSync(
506
+ resolve(testDir, ".env"),
507
+ "PGAI_TAG=0.15.0\nVM_AUTH_USERNAME=custom-user\nVM_AUTH_PASSWORD=custom-pw-do-not-rotate\nREPLICATOR_PASSWORD=" +
508
+ "a".repeat(64) +
509
+ "\n",
510
+ );
511
+ fs.writeFileSync(resolve(testDir, "docker-compose.yml"), "version: '3'\nservices: {}\n");
512
+ fs.writeFileSync(resolve(testDir, "instances.yml"), "# instances\n");
513
+
514
+ const result = runCliInDir(["mon", "update"], testDir, { PGAI_TAG: undefined });
515
+
516
+ const envContent = fs.readFileSync(resolve(testDir, ".env"), "utf8");
517
+
518
+ expect(envContent).toMatch(/^VM_AUTH_USERNAME=custom-user$/m);
519
+ expect(envContent).toMatch(/^VM_AUTH_PASSWORD=custom-pw-do-not-rotate$/m);
520
+ expect(envContent).toMatch(/^REPLICATOR_PASSWORD=a{64}$/m);
521
+
522
+ // When nothing is missing, the migration step should say so.
523
+ expect(result.stdout).toMatch(/\.env is up to date/);
524
+ }, { timeout: TEST_TIMEOUT });
525
+
526
+ test("mon update-config handles a .env that doesn't end with a newline", () => {
527
+ const testDir = resolve(tempDir, "update-config-no-trailing-newline");
528
+ fs.mkdirSync(testDir, { recursive: true });
529
+
530
+ // No trailing newline - migration must add one before appending new keys
531
+ // or we'd produce e.g. `PGAI_TAG=0.14.0VM_AUTH_USERNAME=vmauth`.
532
+ fs.writeFileSync(resolve(testDir, ".env"), "PGAI_TAG=0.14.0");
533
+ fs.writeFileSync(resolve(testDir, "docker-compose.yml"), "version: '3'\nservices: {}\n");
534
+ fs.writeFileSync(resolve(testDir, "instances.yml"), "# instances\n");
535
+
536
+ runCliInDir(["mon", "update-config"], testDir, { PGAI_TAG: undefined });
537
+
538
+ const envContent = fs.readFileSync(resolve(testDir, ".env"), "utf8");
539
+
540
+ expect(envContent).toMatch(/^PGAI_TAG=0\.14\.0$/m);
541
+ expect(envContent).toMatch(/^VM_AUTH_USERNAME=vmauth$/m);
542
+ // No key should be glued onto the previous line.
543
+ expect(envContent).not.toMatch(/PGAI_TAG=0\.14\.0VM_AUTH_USERNAME/);
544
+ }, { timeout: TEST_TIMEOUT });
545
+ });