postgresai 0.15.0-rc.4 → 0.15.0-rc.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -74,6 +74,71 @@ function stripMatchingQuotes(value: string): string {
74
74
  return trimmed;
75
75
  }
76
76
 
77
+ /**
78
+ * Required env vars contract for the monitoring stack.
79
+ *
80
+ * Keys listed here are required by the docker-compose stack and must exist in
81
+ * `.env` for the stack to start cleanly. Each entry knows how to mint a safe
82
+ * default if the key is missing. Existing values are always preserved
83
+ * verbatim - this function is purely additive.
84
+ *
85
+ * This is the spine of the in-place upgrade story: when a user upgrades from
86
+ * a version that didn't require a key (e.g. 0.14, pre-VM-auth) to one that
87
+ * does (0.15), `ensureRequiredEnvVars` appends what's missing so the next
88
+ * `docker compose up` doesn't fail with `missing "<KEY>" env var`.
89
+ */
90
+ type EnvKeyDefault = {
91
+ key: string;
92
+ /** Default value or factory for green-field installs / first upgrade. */
93
+ defaultValue: () => string;
94
+ /** Key was introduced in this CLI version - used in human-readable migration logs. */
95
+ introducedIn: string;
96
+ };
97
+
98
+ const REQUIRED_ENV_KEYS: EnvKeyDefault[] = [
99
+ { key: "REPLICATOR_PASSWORD", defaultValue: () => crypto.randomBytes(32).toString("hex"), introducedIn: "0.13" },
100
+ { key: "VM_AUTH_USERNAME", defaultValue: () => "vmauth", introducedIn: "0.15" },
101
+ { key: "VM_AUTH_PASSWORD", defaultValue: () => crypto.randomBytes(18).toString("base64"), introducedIn: "0.15" },
102
+ ];
103
+
104
+ /**
105
+ * Read `.env` (if present), append any required keys that are missing, write
106
+ * back atomically with 0600 perms, and return the list of keys that were added.
107
+ *
108
+ * Idempotent: a second call is a no-op once all keys are present.
109
+ *
110
+ * Used by `mon local-install`, `mon update`, and `mon update-config` so the
111
+ * in-place upgrade path picks up newly-required env vars without surprising
112
+ * the user with a silent boot failure on `sink-prometheus` / `grafana`.
113
+ */
114
+ function ensureRequiredEnvVars(projectDir: string): string[] {
115
+ const envFile = path.resolve(projectDir, ".env");
116
+ const existing = fs.existsSync(envFile) ? fs.readFileSync(envFile, "utf8") : "";
117
+
118
+ const added: string[] = [];
119
+ const appendLines: string[] = [];
120
+
121
+ for (const spec of REQUIRED_ENV_KEYS) {
122
+ const re = new RegExp(`^${spec.key}=`, "m");
123
+ if (!re.test(existing)) {
124
+ appendLines.push(`${spec.key}=${spec.defaultValue()}`);
125
+ added.push(spec.key);
126
+ }
127
+ }
128
+
129
+ if (appendLines.length === 0) {
130
+ return added;
131
+ }
132
+
133
+ // Append (don't overwrite) so we preserve order and any comments the user
134
+ // may have added to their .env. Make sure we have a trailing newline first.
135
+ const needsTrailingNewline = existing.length > 0 && !existing.endsWith("\n");
136
+ const newContent = existing + (needsTrailingNewline ? "\n" : "") + appendLines.join("\n") + "\n";
137
+ fs.writeFileSync(envFile, newContent, { encoding: "utf8", mode: 0o600 });
138
+
139
+ return added;
140
+ }
141
+
77
142
  // Helper functions for spawning processes - use Node.js child_process for compatibility
78
143
  async function execFilePromise(file: string, args: string[]): Promise<{ stdout: string; stderr: string }> {
79
144
  return new Promise((resolve, reject) => {
@@ -2200,6 +2265,18 @@ function updatePgwatchConfig(configPath: string, updates: Record<string, string>
2200
2265
  fs.writeFileSync(configPath, lines.join("\n") + "\n", { encoding: "utf8", mode: 0o600 });
2201
2266
  }
2202
2267
 
2268
+ /**
2269
+ * Regenerate pgwatch sources and recreate collectors so target changes take effect without `mon restart`.
2270
+ */
2271
+ async function applyMonitoringTargetsConfig(): Promise<number> {
2272
+ console.log("Applying monitoring target configuration...");
2273
+ const generateCode = await runCompose(["run", "--rm", "sources-generator"]);
2274
+ if (generateCode !== 0) return generateCode;
2275
+
2276
+ console.log("Restarting pgwatch collectors to pick up target changes...");
2277
+ return runCompose(["up", "-d", "--force-recreate", "pgwatch-prometheus", "pgwatch-postgres"]);
2278
+ }
2279
+
2203
2280
  /**
2204
2281
  * Run docker compose command
2205
2282
  */
@@ -2970,41 +3047,83 @@ mon
2970
3047
  });
2971
3048
  mon
2972
3049
  .command("update-config")
2973
- .description("apply monitoring services configuration (generate sources)")
3050
+ .description("apply monitoring services configuration (generate sources, migrate .env)")
2974
3051
  .action(async () => {
3052
+ let projectDir: string;
3053
+ try {
3054
+ ({ projectDir } = await resolveOrInitPaths());
3055
+ } catch (error) {
3056
+ const message = error instanceof Error ? error.message : String(error);
3057
+ console.error(message);
3058
+ process.exitCode = 1;
3059
+ return;
3060
+ }
3061
+
3062
+ // Migrate .env first: append any required keys introduced by newer stack
3063
+ // versions (e.g. VM_AUTH_* added in 0.15). This is what makes in-place
3064
+ // upgrades from older deployments not break with `missing "VM_AUTH_USERNAME"
3065
+ // env var` when sink-prometheus boots.
3066
+ const added = ensureRequiredEnvVars(projectDir);
3067
+ if (added.length > 0) {
3068
+ console.log(`Added missing .env keys for this stack version: ${added.join(", ")}`);
3069
+ console.log("(existing values were preserved; missing keys filled with safe defaults)\n");
3070
+ }
3071
+
2975
3072
  const code = await runCompose(["run", "--rm", "sources-generator"]);
2976
3073
  if (code !== 0) process.exitCode = code;
2977
3074
  });
2978
3075
  mon
2979
3076
  .command("update")
2980
- .description("update monitoring stack")
3077
+ .description("update monitoring stack (migrate .env, pull images)")
2981
3078
  .action(async () => {
2982
3079
  console.log("Updating PostgresAI monitoring stack...\n");
2983
3080
 
2984
3081
  try {
2985
- // Check if we're in a git repo
2986
- const gitDir = path.resolve(process.cwd(), ".git");
2987
- if (!fs.existsSync(gitDir)) {
2988
- console.error("Not a git repository. Cannot update.");
3082
+ let projectDir: string;
3083
+ try {
3084
+ ({ projectDir } = await resolveOrInitPaths());
3085
+ } catch (error) {
3086
+ const message = error instanceof Error ? error.message : String(error);
3087
+ console.error(message);
2989
3088
  process.exitCode = 1;
2990
3089
  return;
2991
3090
  }
2992
3091
 
2993
- // Fetch latest changes
2994
- console.log("Fetching latest changes...");
2995
- await execFilePromise("git", ["fetch", "origin"]);
2996
-
2997
- // Check current branch
2998
- const { stdout: branch } = await execFilePromise("git", ["rev-parse", "--abbrev-ref", "HEAD"]);
2999
- const currentBranch = branch.trim();
3000
- console.log(`Current branch: ${currentBranch}`);
3001
-
3002
- // Pull latest changes
3003
- console.log("Pulling latest changes...");
3004
- const { stdout: pullOut } = await execFilePromise("git", ["pull", "origin", currentBranch]);
3005
- console.log(pullOut);
3092
+ // Step 1: migrate .env so newer stack versions that require additional
3093
+ // env vars (e.g. VM_AUTH_USERNAME / VM_AUTH_PASSWORD introduced in 0.15)
3094
+ // don't make `docker compose up` fail silently for users who installed
3095
+ // before those vars existed. Purely additive: existing values are kept.
3096
+ console.log("Checking .env for newly-required keys...");
3097
+ const added = ensureRequiredEnvVars(projectDir);
3098
+ if (added.length > 0) {
3099
+ console.log(`✓ Added missing .env keys: ${added.join(", ")}`);
3100
+ console.log(" (existing values preserved; missing keys filled with safe defaults)");
3101
+ } else {
3102
+ console.log(" .env is up to date");
3103
+ }
3104
+ console.log();
3105
+
3106
+ // Step 2: refresh repo if this is a git-based deployment. Some users
3107
+ // upgrade purely via `npm install -g postgresai@latest` and don't have a
3108
+ // git checkout - in that case we skip git operations and still do the
3109
+ // env migration + docker pull.
3110
+ const gitDir = path.resolve(projectDir, ".git");
3111
+ if (fs.existsSync(gitDir)) {
3112
+ console.log("Fetching latest changes...");
3113
+ await execFilePromise("git", ["fetch", "origin"]);
3114
+
3115
+ const { stdout: branch } = await execFilePromise("git", ["rev-parse", "--abbrev-ref", "HEAD"]);
3116
+ const currentBranch = branch.trim();
3117
+ console.log(`Current branch: ${currentBranch}`);
3118
+
3119
+ console.log("Pulling latest changes...");
3120
+ const { stdout: pullOut } = await execFilePromise("git", ["pull", "origin", currentBranch]);
3121
+ console.log(pullOut);
3122
+ } else {
3123
+ console.log("(not a git checkout — skipping git fetch/pull and going straight to image pull)");
3124
+ }
3006
3125
 
3007
- // Update Docker images
3126
+ // Step 3: pull new images.
3008
3127
  console.log("\nUpdating Docker images...");
3009
3128
  const code = await runCompose(["pull"]);
3010
3129
 
@@ -3234,6 +3353,14 @@ targets
3234
3353
  try {
3235
3354
  addInstanceToFile(file, buildInstance(instanceName, connStr));
3236
3355
  console.log(`Monitoring target '${instanceName}' added`);
3356
+
3357
+ const applyCode = await applyMonitoringTargetsConfig();
3358
+ if (applyCode !== 0) {
3359
+ console.error("Monitoring target was saved, but applying the generated pgwatch sources failed. Run 'postgresai mon restart' to apply manually.");
3360
+ process.exitCode = 1;
3361
+ return;
3362
+ }
3363
+ console.log("✓ Monitoring target configuration applied");
3237
3364
  } catch (err) {
3238
3365
  // Surface InstancesParseError as-is so we don't silently overwrite a
3239
3366
  // corrupted file (which could discard several targets, including the
@@ -3262,6 +3389,14 @@ targets
3262
3389
  return;
3263
3390
  }
3264
3391
  console.log(`Monitoring target '${name}' removed`);
3392
+
3393
+ const applyCode = await applyMonitoringTargetsConfig();
3394
+ if (applyCode !== 0) {
3395
+ console.error("Monitoring target was removed, but applying the generated pgwatch sources failed. Run 'postgresai mon restart' to apply manually.");
3396
+ process.exitCode = 1;
3397
+ return;
3398
+ }
3399
+ console.log("✓ Monitoring target configuration applied");
3265
3400
  } catch (err) {
3266
3401
  const message = err instanceof Error ? err.message : String(err);
3267
3402
  console.error(`Error processing instances.yml: ${message}`);
@@ -13423,7 +13423,7 @@ var {
13423
13423
  // package.json
13424
13424
  var package_default = {
13425
13425
  name: "postgresai",
13426
- version: "0.15.0-rc.4",
13426
+ version: "0.15.0-rc.6",
13427
13427
  description: "postgres_ai CLI",
13428
13428
  license: "Apache-2.0",
13429
13429
  private: false,
@@ -16254,7 +16254,7 @@ var Result = import_lib.default.Result;
16254
16254
  var TypeOverrides = import_lib.default.TypeOverrides;
16255
16255
  var defaults = import_lib.default.defaults;
16256
16256
  // package.json
16257
- var version = "0.15.0-rc.4";
16257
+ var version = "0.15.0-rc.6";
16258
16258
  var package_default2 = {
16259
16259
  name: "postgresai",
16260
16260
  version,
@@ -27687,7 +27687,7 @@ where
27687
27687
  statement_timeout_seconds: 300
27688
27688
  },
27689
27689
  pg_invalid_indexes: {
27690
- description: "This metric identifies invalid indexes in the database with decision tree data for remediation. It provides insights into whether to DROP (if duplicate exists), RECREATE (if backs constraint), or flag as UNCERTAIN (if additional RCA is needed to check query plans). Decision tree: 1) Valid duplicate exists -> DROP, 2) Backs PK/UNIQUE constraint -> RECREATE, 3) Table < 10K rows -> RECREATE (small tables rebuild quickly, typically under 1 second), 4) Otherwise -> UNCERTAIN (need query plan analysis to assess impact).",
27690
+ description: "This metric identifies invalid indexes in the database with decision tree data for remediation. It provides insights into whether to DROP (if duplicate exists), RECREATE (if backs constraint), or flag as UNCERTAIN (if additional RCA is needed to check query plans). Decision tree: 1) Valid duplicate exists -> DROP, 2) Backs PK/UNIQUE constraint -> RECREATE, 3) Table < 10K rows -> RECREATE (small tables rebuild quickly, typically under 1 second), 4) Otherwise -> UNCERTAIN (need query plan analysis to assess impact). Adapts the top-N + `'$other$'` bucket pattern from !262 to this metric: ranks invalid indexes by `index_size_bytes desc` (ties broken by schema, table, then index name for stability), keeps the top 100, and folds the tail into a single `'$other$'` row whose `index_size_bytes` / `table_row_estimate` are summed and whose tag columns carry the literal `'$other$'` sentinel. The `'$other$'` row is omitted entirely (via `HAVING count(*) > 0`) when all invalid indexes fit within the top-100 cap, so its absence on healthy clusters is normal.",
27691
27691
  sqls: {
27692
27692
  11: `with fk_indexes as ( /* pgwatch_generated */
27693
27693
  select
@@ -27755,25 +27755,65 @@ data as (
27755
27755
  left join valid_duplicates vd on vd.invalid_indexrelid = pidx.indexrelid
27756
27756
  where pidx.indisvalid = false
27757
27757
  ),
27758
- num_data as (
27758
+ ranked as (
27759
27759
  select
27760
- row_number() over () as num,
27760
+ row_number() over (
27761
+ order by index_size_bytes desc nulls last,
27762
+ tag_schema_name, tag_table_name, tag_index_name
27763
+ ) as num,
27761
27764
  data.*
27762
27765
  from data
27763
27766
  )
27764
27767
  select
27765
27768
  (extract(epoch from now()) * 1e9)::int8 as epoch_ns,
27766
27769
  current_database() as tag_datname,
27767
- num_data.*
27768
- from num_data
27769
- limit 1000;
27770
+ num,
27771
+ tag_index_name,
27772
+ tag_schema_name,
27773
+ tag_table_name,
27774
+ tag_relation_name,
27775
+ index_definition,
27776
+ index_size_bytes,
27777
+ is_pk,
27778
+ is_unique,
27779
+ constraint_name,
27780
+ table_row_estimate,
27781
+ has_valid_duplicate,
27782
+ valid_index_name,
27783
+ valid_index_definition,
27784
+ supports_fk
27785
+ from ranked
27786
+ where num <= 100
27787
+ union all
27788
+ select
27789
+ (extract(epoch from now()) * 1e9)::int8 as epoch_ns,
27790
+ current_database() as tag_datname,
27791
+ 0::bigint as num,
27792
+ '$other$'::text as tag_index_name,
27793
+ '$other$'::text as tag_schema_name,
27794
+ '$other$'::text as tag_table_name,
27795
+ '$other$'::text as tag_relation_name,
27796
+ '$other$'::text as index_definition,
27797
+ coalesce(sum(index_size_bytes), 0)::int8 as index_size_bytes,
27798
+ false as is_pk,
27799
+ false as is_unique,
27800
+ '$other$'::text as constraint_name,
27801
+ coalesce(sum(table_row_estimate), 0)::bigint as table_row_estimate,
27802
+ bool_or(has_valid_duplicate) as has_valid_duplicate,
27803
+ '$other$'::text as valid_index_name,
27804
+ '$other$'::text as valid_index_definition,
27805
+ coalesce(max(supports_fk), 0)::int as supports_fk
27806
+ from ranked
27807
+ where num > 100
27808
+ group by ()
27809
+ having count(*) > 0;
27770
27810
  `
27771
27811
  },
27772
27812
  gauges: ["*"],
27773
27813
  statement_timeout_seconds: 15
27774
27814
  },
27775
27815
  unused_indexes: {
27776
- description: "This metric identifies unused indexes in the database. It provides insights into the number of unused indexes and their details. This metric helps administrators identify and fix unused indexes to improve database performance.",
27816
+ description: "This metric identifies unused indexes in the database. It provides insights into the number of unused indexes and their details. This metric helps administrators identify and fix unused indexes to improve database performance. Adapts the top-N + `'$other$'` bucket pattern from !262 to this metric: within the `idx_scan = 0 AND idx_is_btree` filter, ranks indexes by `index_size_bytes desc` (ties broken by schema, table, index name), keeps the top 100, and folds the tail into a single `'$other$'` row. Counter columns (`idx_scan`, `all_scans`, `writes`, `index_size_bytes`, `table_size_bytes`, `relpages`) are summed across the tail; ratio columns (`index_scan_pct`, `scans_per_write`) and the `supports_fk` boolean are deliberately zeroed/false on the aggregate row because the tail-level average would mislead and the per-row FK relationship has no meaningful aggregate. Tag columns carry the literal `'$other$'` sentinel. The `'$other$'` row is omitted entirely (via `HAVING count(*) > 0`) when ≤100 indexes match the unused filter.",
27777
27817
  sqls: {
27778
27818
  11: `with fk_indexes as ( /* pgwatch_generated */
27779
27819
  select
@@ -27852,6 +27892,17 @@ limit 1000;
27852
27892
  from indexes i
27853
27893
  join table_scans ts on ts.relid = i.indrelid
27854
27894
  )
27895
+ , ranked as (
27896
+ select
27897
+ row_number() over (
27898
+ order by index_size_bytes desc nulls last,
27899
+ schema_name, table_name, index_name
27900
+ ) as num,
27901
+ *
27902
+ from index_ratios
27903
+ where idx_scan = 0
27904
+ and idx_is_btree
27905
+ )
27855
27906
  select
27856
27907
  'Never Used Indexes' as tag_reason,
27857
27908
  current_database() as tag_datname,
@@ -27871,19 +27922,39 @@ select
27871
27922
  idx_is_btree,
27872
27923
  opclasses as tag_opclasses,
27873
27924
  supports_fk
27874
- from index_ratios
27875
- where
27876
- idx_scan = 0
27877
- and idx_is_btree
27878
- order by index_size_bytes desc
27879
- limit 1000;
27925
+ from ranked
27926
+ where num <= 100
27927
+ union all
27928
+ select
27929
+ 'Never Used Indexes' as tag_reason,
27930
+ current_database() as tag_datname,
27931
+ 0::oid as index_id,
27932
+ '$other$'::text as tag_schema_name,
27933
+ '$other$'::text as tag_table_name,
27934
+ '$other$'::text as tag_index_name,
27935
+ '$other$'::text as index_definition,
27936
+ coalesce(sum(idx_scan), 0)::int8 as idx_scan,
27937
+ coalesce(sum(all_scans), 0)::int8 as all_scans,
27938
+ 0::numeric as index_scan_pct,
27939
+ coalesce(sum(writes), 0)::int8 as writes,
27940
+ 0::numeric as scans_per_write,
27941
+ coalesce(sum(index_size_bytes), 0)::int8 as index_size_bytes,
27942
+ coalesce(sum(table_size_bytes), 0)::int8 as table_size_bytes,
27943
+ coalesce(sum(relpages), 0)::int4 as relpages,
27944
+ true as idx_is_btree,
27945
+ '$other$'::text as tag_opclasses,
27946
+ false as supports_fk
27947
+ from ranked
27948
+ where num > 100
27949
+ group by ()
27950
+ having count(*) > 0;
27880
27951
  `
27881
27952
  },
27882
27953
  gauges: ["*"],
27883
27954
  statement_timeout_seconds: 15
27884
27955
  },
27885
27956
  redundant_indexes: {
27886
- description: "This metric identifies redundant indexes that can potentially be dropped to save storage space and improve write performance. It analyzes index relationships and finds indexes that are covered by other indexes, considering column order, operator classes, and foreign key constraints. Uses the exact logic from tmp.sql with JSON aggregation and proper thresholds.",
27957
+ description: "This metric identifies redundant indexes that can potentially be dropped to save storage space and improve write performance. It analyzes index relationships and finds indexes that are covered by other indexes, considering column order, operator classes, and foreign key constraints. Uses the exact logic from tmp.sql with JSON aggregation and proper thresholds. Adapts the top-N + `'$other$'` bucket pattern from !262 to this metric: ranks redundant indexes by `index_size_bytes desc` (ties broken by `table_name`), keeps the top 100, and folds the tail into a single `'$other$'` row whose `table_size_bytes`, `index_size_bytes` and `index_usage` columns are summed and whose tag columns carry the literal `'$other$'` sentinel. The `redundant_indexes_grouped` CTE intentionally preserves duplicate column aliases (`tag_schema_name` / `tag_index_name` appear twice — once from the raw name and once from the `formated_*` variant) because the dashboards rely on both spellings; the duplication is preserved on the `'$other$'` row for consistency. The `'$other$'` row is omitted entirely (via `HAVING count(*) > 0`) when there are ≤100 redundant pairs, so its absence on healthy clusters is normal.",
27887
27958
  sqls: {
27888
27959
  11: `with fk_indexes as ( /* pgwatch_generated */
27889
27960
  select
@@ -28035,9 +28106,43 @@ redundant_indexes_tmp_num as (
28035
28106
  formated_relation_name,
28036
28107
  supports_fk
28037
28108
  order by index_size_bytes desc
28109
+ ),
28110
+ -- redundant_indexes_grouped intentionally exposes duplicate aliases
28111
+ -- (tag_schema_name / tag_index_name appear twice — once from the
28112
+ -- raw name and once from the formated_* variant). select * over it
28113
+ -- preserves both. Order by table_name (unique, non-duplicated).
28114
+ ranked as (
28115
+ select
28116
+ row_number() over (
28117
+ order by index_size_bytes desc nulls last, table_name
28118
+ ) as num,
28119
+ redundant_indexes_grouped.*
28120
+ from redundant_indexes_grouped
28038
28121
  )
28039
- select * from redundant_indexes_grouped
28040
- limit 1000;
28122
+ select * from ranked where num <= 100
28123
+ union all
28124
+ select
28125
+ 0::bigint as num,
28126
+ 0::oid as index_id,
28127
+ '$other$'::text as tag_schema_name,
28128
+ '$other$'::text as table_name,
28129
+ coalesce(sum(table_size_bytes), 0)::int8 as table_size_bytes,
28130
+ '$other$'::text as tag_index_name,
28131
+ '$other$'::text as tag_access_method,
28132
+ '$other$'::text as tag_reason,
28133
+ coalesce(sum(index_size_bytes), 0)::int8 as index_size_bytes,
28134
+ coalesce(sum(index_usage), 0)::int8 as index_usage,
28135
+ '$other$'::text as index_definition,
28136
+ '$other$'::text as tag_index_name,
28137
+ '$other$'::text as tag_schema_name,
28138
+ '$other$'::text as tag_table_name,
28139
+ '$other$'::text as tag_relation_name,
28140
+ coalesce(max(supports_fk), 0)::int as supports_fk,
28141
+ '$other$'::text as redundant_to_json
28142
+ from ranked
28143
+ where num > 100
28144
+ group by ()
28145
+ having count(*) > 0;
28041
28146
  `
28042
28147
  },
28043
28148
  gauges: ["*"],
@@ -28059,93 +28164,139 @@ where datname = current_database()
28059
28164
  statement_timeout_seconds: 15
28060
28165
  },
28061
28166
  pg_table_bloat: {
28062
- description: "This metric analyzes estimated table bloat by calculating the estimated vs actual table pages and sizes. It provides insights into estimated bloat percentage, real size, extra size due to estimated bloat, and estimated bloat size considering fill factor. This metric helps administrators identify tables that may need maintenance like VACUUM FULL or table reorganization.",
28167
+ description: "Estimated per-table bloat (heap pages allocated vs heap pages needed at perfect packing), bounded to the top 100 per database. Adapts the top-N + `'$other$'` bucket pattern from !262: everything below the cap is summed into a single `'$other$'` row so dashboard \"total bloat across the DB\" stays correct even when the tail is large. Ranks by `bloat_pct` descending (most-bloated tables first), with `is_na = 0` preferred (don't crowd top-N with tables whose estimate is unreliable) and stable schemaname/tblname tiebreakers. Preserves the existing >1 MiB filter (zero-byte and tiny tables aren't interesting for bloat). Aggregate semantics on the `'$other$'` row: sum for real_size_mib / extra_size / bloat_size (total wasted bytes in the tail); recompute extra_pct and bloat_pct from the summed numerator/denominator (weighted-avg effectively); avg(fillfactor); max(is_na) (any tail row with bad stats taints the aggregate). The `'$other$'` sentinel cannot collide with a real Postgres identifier.",
28063
28168
  sqls: {
28064
- 11: `select current_database() as tag_datname, schemaname as tag_schemaname, tblname as tag_tblname, (bs*tblpages)/(1024*1024)::float as real_size_mib, /* pgwatch_generated */
28065
- (tblpages-est_tblpages)*bs as extra_size,
28066
- case when tblpages > 0 and tblpages - est_tblpages > 0
28067
- then 100 * (tblpages - est_tblpages)/tblpages::float
28068
- else 0
28069
- end as extra_pct, fillfactor,
28070
- case when tblpages - est_tblpages_ff > 0
28071
- then (tblpages-est_tblpages_ff)*bs
28072
- else 0
28073
- end as bloat_size,
28074
- case when tblpages > 0 and tblpages - est_tblpages_ff > 0
28075
- then 100 * (tblpages - est_tblpages_ff)/tblpages::float
28076
- else 0
28077
- end as bloat_pct, is_na
28078
- -- , tpl_hdr_size, tpl_data_size, (pst).free_percent + (pst).dead_tuple_percent as real_frag -- (DEBUG INFO)
28079
- from (
28080
- select ceil( reltuples / ( (bs-page_hdr)/tpl_size ) ) + ceil( toasttuples / 4 ) as est_tblpages,
28081
- ceil( reltuples / ( (bs-page_hdr)*fillfactor/(tpl_size*100) ) ) + ceil( toasttuples / 4 ) as est_tblpages_ff,
28082
- tblpages, fillfactor, bs, tblid, schemaname, tblname, heappages, toastpages, is_na
28083
- -- , tpl_hdr_size, tpl_data_size, pgstattuple(tblid) as pst -- (DEBUG INFO)
28169
+ 11: `with bloat_data as ( /* pgwatch_generated */
28170
+ select schemaname, tblname,
28171
+ (bs*tblpages)/(1024*1024)::float as real_size_mib,
28172
+ (tblpages-est_tblpages)*bs as extra_size,
28173
+ case when tblpages > 0 and tblpages - est_tblpages > 0
28174
+ then 100 * (tblpages - est_tblpages)/tblpages::float
28175
+ else 0
28176
+ end as extra_pct,
28177
+ fillfactor,
28178
+ case when tblpages - est_tblpages_ff > 0
28179
+ then (tblpages-est_tblpages_ff)*bs
28180
+ else 0
28181
+ end as bloat_size,
28182
+ case when tblpages > 0 and tblpages - est_tblpages_ff > 0
28183
+ then 100 * (tblpages - est_tblpages_ff)/tblpages::float
28184
+ else 0
28185
+ end as bloat_pct,
28186
+ is_na,
28187
+ -- carried for the $other$ aggregate denominators
28188
+ bs, tblpages, est_tblpages, est_tblpages_ff
28084
28189
  from (
28085
- select
28086
- ( 4 + tpl_hdr_size + tpl_data_size + (2*ma)
28087
- - case when tpl_hdr_size%ma = 0 then ma else tpl_hdr_size%ma end
28088
- - case when ceil(tpl_data_size)::int%ma = 0 then ma else ceil(tpl_data_size)::int%ma end
28089
- ) as tpl_size, bs - page_hdr as size_per_block, (heappages + toastpages) as tblpages, heappages,
28090
- toastpages, reltuples, toasttuples, bs, page_hdr, tblid, schemaname, tblname, fillfactor, is_na
28091
- -- , tpl_hdr_size, tpl_data_size
28190
+ select ceil( reltuples / ( (bs-page_hdr)/tpl_size ) ) + ceil( toasttuples / 4 ) as est_tblpages,
28191
+ ceil( reltuples / ( (bs-page_hdr)*fillfactor/(tpl_size*100) ) ) + ceil( toasttuples / 4 ) as est_tblpages_ff,
28192
+ tblpages, fillfactor, bs, tblid, schemaname, tblname, heappages, toastpages, is_na
28092
28193
  from (
28093
28194
  select
28094
- tbl.oid as tblid, ns.nspname as schemaname, tbl.relname as tblname, tbl.reltuples,
28095
- tbl.relpages as heappages, coalesce(toast.relpages, 0) as toastpages,
28096
- coalesce(toast.reltuples, 0) as toasttuples,
28097
- coalesce(substring(
28098
- array_to_string(tbl.reloptions, ' ')
28099
- from 'fillfactor=([0-9]+)')::smallint, 100) as fillfactor,
28100
- current_setting('block_size')::numeric as bs,
28101
- case when version()~'mingw32' or version()~'64-bit|x86_64|ppc64|ia64|amd64' then 8 else 4 end as ma,
28102
- 24 as page_hdr,
28103
- 23 + case when max(coalesce(s.null_frac,0)) > 0 then ( 7 + count(s.attname) ) / 8 else 0::int end
28104
- + case when bool_or(att.attname = 'oid' and att.attnum < 0) then 4 else 0 end as tpl_hdr_size,
28105
- sum( (1-coalesce(s.null_frac, 0)) * coalesce(s.avg_width, 0) ) as tpl_data_size,
28106
- (bool_or(att.atttypid = 'pg_catalog.name'::regtype)
28107
- or sum(case when att.attnum > 0 then 1 else 0 end) <> count(s.attname))::int as is_na
28108
- from pg_attribute as att
28109
- join pg_class as tbl on att.attrelid = tbl.oid
28110
- join pg_namespace as ns on ns.oid = tbl.relnamespace
28111
- left join postgres_ai.pg_statistic as s on s.schemaname=ns.nspname
28112
- and s.tablename = tbl.relname and s.inherited=false and s.attname=att.attname
28113
- left join pg_class as toast on tbl.reltoastrelid = toast.oid
28114
- where not att.attisdropped
28115
- and tbl.relkind in ('r','m')
28116
- group by 1,2,3,4,5,6,7,8,9,10
28117
- order by 2,3
28118
- ) as s
28119
- ) as s2
28120
- ) as s3
28121
- -- where not is_na
28122
- -- and tblpages*((pst).free_percent + (pst).dead_tuple_percent)::float4/100 >= 1
28123
- where (bs * tblpages::float / (1024 * 1024)) > 1 /* exclude tables below 1 MiB */
28124
- order by is_na = 0 desc, bloat_pct desc
28125
- limit 1000
28195
+ ( 4 + tpl_hdr_size + tpl_data_size + (2*ma)
28196
+ - case when tpl_hdr_size%ma = 0 then ma else tpl_hdr_size%ma end
28197
+ - case when ceil(tpl_data_size)::int%ma = 0 then ma else ceil(tpl_data_size)::int%ma end
28198
+ ) as tpl_size, bs - page_hdr as size_per_block, (heappages + toastpages) as tblpages, heappages,
28199
+ toastpages, reltuples, toasttuples, bs, page_hdr, tblid, schemaname, tblname, fillfactor, is_na
28200
+ from (
28201
+ select
28202
+ tbl.oid as tblid, ns.nspname as schemaname, tbl.relname as tblname, tbl.reltuples,
28203
+ tbl.relpages as heappages, coalesce(toast.relpages, 0) as toastpages,
28204
+ coalesce(toast.reltuples, 0) as toasttuples,
28205
+ coalesce(substring(
28206
+ array_to_string(tbl.reloptions, ' ')
28207
+ from 'fillfactor=([0-9]+)')::smallint, 100) as fillfactor,
28208
+ current_setting('block_size')::numeric as bs,
28209
+ case when version()~'mingw32' or version()~'64-bit|x86_64|ppc64|ia64|amd64' then 8 else 4 end as ma,
28210
+ 24 as page_hdr,
28211
+ 23 + case when max(coalesce(s.null_frac,0)) > 0 then ( 7 + count(s.attname) ) / 8 else 0::int end
28212
+ + case when bool_or(att.attname = 'oid' and att.attnum < 0) then 4 else 0 end as tpl_hdr_size,
28213
+ sum( (1-coalesce(s.null_frac, 0)) * coalesce(s.avg_width, 0) ) as tpl_data_size,
28214
+ (bool_or(att.atttypid = 'pg_catalog.name'::regtype)
28215
+ or sum(case when att.attnum > 0 then 1 else 0 end) <> count(s.attname))::int as is_na
28216
+ from pg_attribute as att
28217
+ join pg_class as tbl on att.attrelid = tbl.oid
28218
+ join pg_namespace as ns on ns.oid = tbl.relnamespace
28219
+ left join postgres_ai.pg_statistic as s on s.schemaname=ns.nspname
28220
+ and s.tablename = tbl.relname and s.inherited=false and s.attname=att.attname
28221
+ left join pg_class as toast on tbl.reltoastrelid = toast.oid
28222
+ where not att.attisdropped
28223
+ and tbl.relkind in ('r','m')
28224
+ group by 1,2,3,4,5,6,7,8,9,10
28225
+ order by 2,3
28226
+ ) as s
28227
+ ) as s2
28228
+ ) as s3
28229
+ where (bs * tblpages::float / (1024 * 1024)) > 1 /* exclude tables below 1 MiB */
28230
+ ),
28231
+ ranked as (
28232
+ select
28233
+ row_number() over (
28234
+ order by is_na = 0 desc, bloat_pct desc nulls last,
28235
+ schemaname, tblname
28236
+ ) as rownum,
28237
+ *
28238
+ from bloat_data
28239
+ )
28240
+ select
28241
+ current_database() as tag_datname,
28242
+ schemaname as tag_schemaname,
28243
+ tblname as tag_tblname,
28244
+ real_size_mib,
28245
+ extra_size,
28246
+ extra_pct,
28247
+ fillfactor,
28248
+ bloat_size,
28249
+ bloat_pct,
28250
+ is_na
28251
+ from ranked
28252
+ where rownum <= 100
28253
+ union all
28254
+ select
28255
+ current_database() as tag_datname,
28256
+ '$other$'::text as tag_schemaname,
28257
+ '$other$'::text as tag_tblname,
28258
+ coalesce(sum(real_size_mib), 0)::float as real_size_mib,
28259
+ coalesce(sum(extra_size), 0)::int8 as extra_size,
28260
+ case when sum(tblpages) > 0
28261
+ then 100 * sum(greatest(tblpages - est_tblpages, 0))::float / sum(tblpages)
28262
+ else 0
28263
+ end::float as extra_pct,
28264
+ coalesce(avg(fillfactor), 100)::smallint as fillfactor,
28265
+ coalesce(sum(bloat_size), 0)::int8 as bloat_size,
28266
+ case when sum(tblpages) > 0
28267
+ then 100 * sum(greatest(tblpages - est_tblpages_ff, 0))::float / sum(tblpages)
28268
+ else 0
28269
+ end::float as bloat_pct,
28270
+ coalesce(max(is_na), 0)::int as is_na
28271
+ from ranked
28272
+ where rownum > 100
28273
+ group by ()
28274
+ having count(*) > 0
28126
28275
  `
28127
28276
  },
28128
28277
  gauges: ["real_size_mib", "extra_size", "extra_pct", "fillfactor", "bloat_size", "bloat_pct", "is_na", "reltuples"],
28129
28278
  statement_timeout_seconds: 300
28130
28279
  },
28131
28280
  pg_btree_bloat: {
28132
- description: "This metric analyzes estimated index bloat by calculating the estimated vs actual index pages and sizes. It provides insights into estimated bloat percentage, real size, extra size due to estimated bloat, and estimated bloat size considering fill factor. This metric helps administrators identify indexes that may need maintenance like VACUUM FULL or index reorganization.",
28281
+ description: "Estimated per-btree-index bloat (index pages allocated vs index pages needed at perfect packing), bounded to the top 100 per database. Adapts the top-N + `'$other$'` bucket pattern from !262. Ranks by `bloat_pct` descending with `is_na = 0` preferred and stable schema/table/idx tiebreakers. Preserves the existing >1 MiB filter. Aggregate semantics on the `'$other$'` row: sum for real_size_mib / extra_size / bloat_size; recompute extra_pct and bloat_pct from sum(relpages-est_pages) / sum(relpages) (weighted avg over the tail); avg(fillfactor); max(is_na); table_size_mib doesn't aggregate meaningfully across indexes on different tables, so the `'$other$'` row reports 0. The `'$other$'` sentinel cannot collide with a real Postgres identifier.",
28133
28282
  sqls: {
28134
- 11: `select /* pgwatch_generated */
28135
- current_database() as tag_datname, nspname as tag_schemaname, tblname as tag_tblname, idxname as tag_idxname,
28136
- (bs*(relpages)/(1024*1024))::float as real_size_mib,
28137
- (pg_relation_size(tbloid)/(1024*1024))::float as table_size_mib,
28138
- (bs*(relpages-est_pages))::float as extra_size,
28139
- 100 * (relpages-est_pages)::float / relpages as extra_pct,
28140
- fillfactor,
28141
- case when relpages > est_pages_ff
28142
- then bs*(relpages-est_pages_ff)
28143
- else 0
28144
- end as bloat_size,
28145
- 100 * (relpages-est_pages_ff)::float / relpages as bloat_pct,
28146
- is_na
28147
- -- , 100-(pst).avg_leaf_density as pst_avg_bloat, est_pages, index_tuple_hdr_bm, maxalign, pagehdr, nulldatawidth, nulldatahdrwidth, reltuples, relpages -- (DEBUG INFO)
28148
- from (
28283
+ 11: `with bloat_data as ( /* pgwatch_generated */
28284
+ select
28285
+ nspname, tblname, idxname,
28286
+ (bs*(relpages)/(1024*1024))::float as real_size_mib,
28287
+ (pg_relation_size(tbloid)/(1024*1024))::float as table_size_mib,
28288
+ (bs*(relpages-est_pages))::float as extra_size,
28289
+ 100 * (relpages-est_pages)::float / relpages as extra_pct,
28290
+ fillfactor,
28291
+ case when relpages > est_pages_ff
28292
+ then bs*(relpages-est_pages_ff)
28293
+ else 0
28294
+ end as bloat_size,
28295
+ 100 * (relpages-est_pages_ff)::float / relpages as bloat_pct,
28296
+ is_na,
28297
+ -- carried for the $other$ aggregate denominators
28298
+ bs, relpages, est_pages, est_pages_ff
28299
+ from (
28149
28300
  select coalesce(1 +
28150
28301
  ceil(reltuples/floor((bs-pageopqdata-pagehdr)/(4+nulldatahdrwidth)::float)), 0 -- ItemIdData size + computed avg size of a tuple (nulldatahdrwidth)
28151
28302
  ) as est_pages,
@@ -28232,8 +28383,55 @@ from (
28232
28383
  ) as rows_hdr_pdg_stats
28233
28384
  ) as relation_stats
28234
28385
  where (bs * relpages::float / (1024 * 1024)) > 1 /* exclude indexes below 1 MiB */
28235
- order by is_na = 0 desc, bloat_pct desc
28236
- limit 1000
28386
+ ),
28387
+ ranked as (
28388
+ select
28389
+ row_number() over (
28390
+ order by is_na = 0 desc, bloat_pct desc nulls last,
28391
+ nspname, tblname, idxname
28392
+ ) as rownum,
28393
+ *
28394
+ from bloat_data
28395
+ )
28396
+ select
28397
+ current_database() as tag_datname,
28398
+ nspname as tag_schemaname,
28399
+ tblname as tag_tblname,
28400
+ idxname as tag_idxname,
28401
+ real_size_mib,
28402
+ table_size_mib,
28403
+ extra_size,
28404
+ extra_pct,
28405
+ fillfactor,
28406
+ bloat_size,
28407
+ bloat_pct,
28408
+ is_na
28409
+ from ranked
28410
+ where rownum <= 100
28411
+ union all
28412
+ select
28413
+ current_database() as tag_datname,
28414
+ '$other$'::text as tag_schemaname,
28415
+ '$other$'::text as tag_tblname,
28416
+ '$other$'::text as tag_idxname,
28417
+ coalesce(sum(real_size_mib), 0)::float as real_size_mib,
28418
+ 0::float as table_size_mib,
28419
+ coalesce(sum(extra_size), 0)::float as extra_size,
28420
+ case when sum(relpages) > 0
28421
+ then 100 * sum(greatest(relpages - est_pages, 0))::float / sum(relpages)
28422
+ else 0
28423
+ end::float as extra_pct,
28424
+ coalesce(avg(fillfactor), 90)::smallint as fillfactor,
28425
+ coalesce(sum(bloat_size), 0)::float as bloat_size,
28426
+ case when sum(relpages) > 0
28427
+ then 100 * sum(greatest(relpages - est_pages_ff, 0))::float / sum(relpages)
28428
+ else 0
28429
+ end::float as bloat_pct,
28430
+ coalesce(max(is_na), 0)::int as is_na
28431
+ from ranked
28432
+ where rownum > 100
28433
+ group by ()
28434
+ having count(*) > 0
28237
28435
  `
28238
28436
  },
28239
28437
  gauges: ["real_size_mib", "table_size_mib", "extra_size", "extra_pct", "fillfactor", "bloat_size", "bloat_pct", "is_na", "reltuples"],
@@ -33353,6 +33551,35 @@ function stripMatchingQuotes(value) {
33353
33551
  }
33354
33552
  return trimmed;
33355
33553
  }
33554
+ var REQUIRED_ENV_KEYS = [
33555
+ { key: "REPLICATOR_PASSWORD", defaultValue: () => crypto2.randomBytes(32).toString("hex"), introducedIn: "0.13" },
33556
+ { key: "VM_AUTH_USERNAME", defaultValue: () => "vmauth", introducedIn: "0.15" },
33557
+ { key: "VM_AUTH_PASSWORD", defaultValue: () => crypto2.randomBytes(18).toString("base64"), introducedIn: "0.15" }
33558
+ ];
33559
+ function ensureRequiredEnvVars(projectDir) {
33560
+ const envFile = path7.resolve(projectDir, ".env");
33561
+ const existing = fs8.existsSync(envFile) ? fs8.readFileSync(envFile, "utf8") : "";
33562
+ const added = [];
33563
+ const appendLines = [];
33564
+ for (const spec of REQUIRED_ENV_KEYS) {
33565
+ const re = new RegExp(`^${spec.key}=`, "m");
33566
+ if (!re.test(existing)) {
33567
+ appendLines.push(`${spec.key}=${spec.defaultValue()}`);
33568
+ added.push(spec.key);
33569
+ }
33570
+ }
33571
+ if (appendLines.length === 0) {
33572
+ return added;
33573
+ }
33574
+ const needsTrailingNewline = existing.length > 0 && !existing.endsWith(`
33575
+ `);
33576
+ const newContent = existing + (needsTrailingNewline ? `
33577
+ ` : "") + appendLines.join(`
33578
+ `) + `
33579
+ `;
33580
+ fs8.writeFileSync(envFile, newContent, { encoding: "utf8", mode: 384 });
33581
+ return added;
33582
+ }
33356
33583
  async function execFilePromise(file, args) {
33357
33584
  return new Promise((resolve8, reject) => {
33358
33585
  childProcess.execFile(file, args, (error2, stdout, stderr) => {
@@ -34979,6 +35206,14 @@ function updatePgwatchConfig(configPath, updates) {
34979
35206
  `) + `
34980
35207
  `, { encoding: "utf8", mode: 384 });
34981
35208
  }
35209
+ async function applyMonitoringTargetsConfig() {
35210
+ console.log("Applying monitoring target configuration...");
35211
+ const generateCode = await runCompose(["run", "--rm", "sources-generator"]);
35212
+ if (generateCode !== 0)
35213
+ return generateCode;
35214
+ console.log("Restarting pgwatch collectors to pick up target changes...");
35215
+ return runCompose(["up", "-d", "--force-recreate", "pgwatch-prometheus", "pgwatch-postgres"]);
35216
+ }
34982
35217
  async function runCompose(args, grafanaPassword) {
34983
35218
  let composeFile;
34984
35219
  let projectDir;
@@ -35627,29 +35862,61 @@ Instances configuration:
35627
35862
  console.log();
35628
35863
  }
35629
35864
  });
35630
- mon.command("update-config").description("apply monitoring services configuration (generate sources)").action(async () => {
35865
+ mon.command("update-config").description("apply monitoring services configuration (generate sources, migrate .env)").action(async () => {
35866
+ let projectDir;
35867
+ try {
35868
+ ({ projectDir } = await resolveOrInitPaths());
35869
+ } catch (error2) {
35870
+ const message = error2 instanceof Error ? error2.message : String(error2);
35871
+ console.error(message);
35872
+ process.exitCode = 1;
35873
+ return;
35874
+ }
35875
+ const added = ensureRequiredEnvVars(projectDir);
35876
+ if (added.length > 0) {
35877
+ console.log(`Added missing .env keys for this stack version: ${added.join(", ")}`);
35878
+ console.log(`(existing values were preserved; missing keys filled with safe defaults)
35879
+ `);
35880
+ }
35631
35881
  const code = await runCompose(["run", "--rm", "sources-generator"]);
35632
35882
  if (code !== 0)
35633
35883
  process.exitCode = code;
35634
35884
  });
35635
- mon.command("update").description("update monitoring stack").action(async () => {
35885
+ mon.command("update").description("update monitoring stack (migrate .env, pull images)").action(async () => {
35636
35886
  console.log(`Updating PostgresAI monitoring stack...
35637
35887
  `);
35638
35888
  try {
35639
- const gitDir = path7.resolve(process.cwd(), ".git");
35640
- if (!fs8.existsSync(gitDir)) {
35641
- console.error("Not a git repository. Cannot update.");
35889
+ let projectDir;
35890
+ try {
35891
+ ({ projectDir } = await resolveOrInitPaths());
35892
+ } catch (error2) {
35893
+ const message = error2 instanceof Error ? error2.message : String(error2);
35894
+ console.error(message);
35642
35895
  process.exitCode = 1;
35643
35896
  return;
35644
35897
  }
35645
- console.log("Fetching latest changes...");
35646
- await execFilePromise("git", ["fetch", "origin"]);
35647
- const { stdout: branch } = await execFilePromise("git", ["rev-parse", "--abbrev-ref", "HEAD"]);
35648
- const currentBranch = branch.trim();
35649
- console.log(`Current branch: ${currentBranch}`);
35650
- console.log("Pulling latest changes...");
35651
- const { stdout: pullOut } = await execFilePromise("git", ["pull", "origin", currentBranch]);
35652
- console.log(pullOut);
35898
+ console.log("Checking .env for newly-required keys...");
35899
+ const added = ensureRequiredEnvVars(projectDir);
35900
+ if (added.length > 0) {
35901
+ console.log(`\u2713 Added missing .env keys: ${added.join(", ")}`);
35902
+ console.log(" (existing values preserved; missing keys filled with safe defaults)");
35903
+ } else {
35904
+ console.log("\u2713 .env is up to date");
35905
+ }
35906
+ console.log();
35907
+ const gitDir = path7.resolve(projectDir, ".git");
35908
+ if (fs8.existsSync(gitDir)) {
35909
+ console.log("Fetching latest changes...");
35910
+ await execFilePromise("git", ["fetch", "origin"]);
35911
+ const { stdout: branch } = await execFilePromise("git", ["rev-parse", "--abbrev-ref", "HEAD"]);
35912
+ const currentBranch = branch.trim();
35913
+ console.log(`Current branch: ${currentBranch}`);
35914
+ console.log("Pulling latest changes...");
35915
+ const { stdout: pullOut } = await execFilePromise("git", ["pull", "origin", currentBranch]);
35916
+ console.log(pullOut);
35917
+ } else {
35918
+ console.log("(not a git checkout \u2014 skipping git fetch/pull and going straight to image pull)");
35919
+ }
35653
35920
  console.log(`
35654
35921
  Updating Docker images...`);
35655
35922
  const code = await runCompose(["pull"]);
@@ -35840,6 +36107,13 @@ targets.command("add [connStr] [name]").description("add monitoring target datab
35840
36107
  try {
35841
36108
  addInstanceToFile(file, buildInstance(instanceName, connStr));
35842
36109
  console.log(`Monitoring target '${instanceName}' added`);
36110
+ const applyCode = await applyMonitoringTargetsConfig();
36111
+ if (applyCode !== 0) {
36112
+ console.error("Monitoring target was saved, but applying the generated pgwatch sources failed. Run 'postgresai mon restart' to apply manually.");
36113
+ process.exitCode = 1;
36114
+ return;
36115
+ }
36116
+ console.log("\u2713 Monitoring target configuration applied");
35843
36117
  } catch (err) {
35844
36118
  const message = err instanceof Error ? err.message : String(err);
35845
36119
  console.error(message);
@@ -35861,6 +36135,13 @@ targets.command("remove <name>").description("remove monitoring target database"
35861
36135
  return;
35862
36136
  }
35863
36137
  console.log(`Monitoring target '${name}' removed`);
36138
+ const applyCode = await applyMonitoringTargetsConfig();
36139
+ if (applyCode !== 0) {
36140
+ console.error("Monitoring target was removed, but applying the generated pgwatch sources failed. Run 'postgresai mon restart' to apply manually.");
36141
+ process.exitCode = 1;
36142
+ return;
36143
+ }
36144
+ console.log("\u2713 Monitoring target configuration applied");
35864
36145
  } catch (err) {
35865
36146
  const message = err instanceof Error ? err.message : String(err);
35866
36147
  console.error(`Error processing instances.yml: ${message}`);
@@ -57,17 +57,6 @@ export function getCheckupEntry(code: string): CheckupDictionaryEntry | null {
57
57
  return dictionaryByCode.get(code.toUpperCase()) ?? null;
58
58
  }
59
59
 
60
- /**
61
- * Get the title for a checkup code.
62
- *
63
- * @param code - The check code (e.g., "A001", "H002")
64
- * @returns The title or the code itself if not found
65
- */
66
- export function getCheckupTitle(code: string): string {
67
- const entry = getCheckupEntry(code);
68
- return entry?.title ?? code;
69
- }
70
-
71
60
  /**
72
61
  * Check if a code exists in the dictionary.
73
62
  *
package/lib/checkup.ts CHANGED
@@ -2,41 +2,41 @@
2
2
  * Express Checkup Module
3
3
  * ======================
4
4
  * Generates JSON health check reports directly from PostgreSQL without Prometheus.
5
- *
5
+ *
6
6
  * ARCHITECTURAL DECISIONS
7
7
  * -----------------------
8
- *
8
+ *
9
9
  * 1. SINGLE SOURCE OF TRUTH FOR SQL QUERIES
10
- * Complex metrics (index health, settings, db_stats) are loaded from
10
+ * Complex metrics (index health, settings, db_stats) are loaded from
11
11
  * config/pgwatch-prometheus/metrics.yml via getMetricSql() from metrics-loader.ts.
12
- *
12
+ *
13
13
  * Simple queries (version, database list, connection states, uptime) use
14
14
  * inline SQL as they're trivial and CLI-specific.
15
- *
15
+ *
16
16
  * 2. JSON SCHEMA COMPLIANCE
17
17
  * All generated reports MUST comply with JSON schemas in reporter/schemas/.
18
18
  * These schemas define the expected format for both:
19
19
  * - Full-fledged monitoring reporter output
20
20
  * - Express checkup output
21
- *
21
+ *
22
22
  * Before adding or modifying a report, verify the corresponding schema exists
23
23
  * and ensure the output matches. Run schema validation tests to confirm.
24
- *
24
+ *
25
25
  * 3. ERROR HANDLING STRATEGY
26
26
  * Functions follow two patterns based on criticality:
27
- *
27
+ *
28
28
  * PROPAGATING (throws on error):
29
29
  * - Core data functions: getPostgresVersion, getSettings, getAlteredSettings,
30
30
  * getDatabaseSizes, getInvalidIndexes, getUnusedIndexes, getRedundantIndexes
31
31
  * - If these fail, the entire report should fail (data is required)
32
32
  * - Callers should handle errors at the report generation level
33
- *
33
+ *
34
34
  * GRACEFUL DEGRADATION (catches errors, includes error in output):
35
35
  * - Optional/supplementary queries: pg_stat_statements, pg_stat_kcache checks,
36
36
  * memory calculations, postmaster startup time
37
37
  * - These are nice-to-have; missing data shouldn't fail the whole report
38
38
  * - Errors are logged and included in report output for visibility
39
- *
39
+ *
40
40
  * ADDING NEW REPORTS
41
41
  * ------------------
42
42
  * 1. Add/verify the metric exists in config/pgwatch-prometheus/metrics.yml
@@ -51,7 +51,7 @@ import * as fs from "fs";
51
51
  import * as path from "path";
52
52
  import * as pkg from "../package.json";
53
53
  import { getMetricSql, transformMetricRow, METRIC_NAMES } from "./metrics-loader";
54
- import { getCheckupTitle, buildCheckInfoMap } from "./checkup-dictionary";
54
+ import { buildCheckInfoMap } from "./checkup-dictionary";
55
55
 
56
56
  // Time constants
57
57
  const SECONDS_PER_DAY = 86400;
@@ -336,7 +336,7 @@ export function parseVersionNum(versionNum: string): { major: string; minor: str
336
336
  /**
337
337
  * Format bytes to human readable string using binary units (1024-based).
338
338
  * Uses IEC standard: KiB, MiB, GiB, etc.
339
- *
339
+ *
340
340
  * Note: PostgreSQL's pg_size_pretty() uses kB/MB/GB with 1024 base (technically
341
341
  * incorrect SI usage), but we follow IEC binary units per project style guide.
342
342
  */
@@ -387,7 +387,7 @@ function formatSettingPrettyValue(
387
387
  /**
388
388
  * Get PostgreSQL version information.
389
389
  * Uses simple inline SQL (trivial query, CLI-specific).
390
- *
390
+ *
391
391
  * @throws {Error} If database query fails (propagating - critical data)
392
392
  */
393
393
  export async function getPostgresVersion(client: Client): Promise<PostgresVersion> {
@@ -1084,7 +1084,7 @@ export const generateH004 = (client: Client, nodeName = "node-01") =>
1084
1084
 
1085
1085
  /**
1086
1086
  * Generate D004 report - pg_stat_statements and pg_stat_kcache settings.
1087
- *
1087
+ *
1088
1088
  * Uses graceful degradation: extension queries are wrapped in try-catch
1089
1089
  * because extensions may not be installed. Errors are included in the
1090
1090
  * report output rather than failing the entire report.
package/lib/init.ts CHANGED
@@ -87,7 +87,7 @@ export type AdminConnection = {
87
87
  /**
88
88
  * Check if an error indicates SSL negotiation failed and fallback to non-SSL should be attempted.
89
89
  * This mimics libpq's sslmode=prefer behavior.
90
- *
90
+ *
91
91
  * IMPORTANT: This should NOT match certificate errors (expired, invalid, self-signed)
92
92
  * as those are real errors the user needs to fix, not negotiation failures.
93
93
  */
@@ -1,6 +1,6 @@
1
1
  /**
2
2
  * Metrics loader for express checkup reports
3
- *
3
+ *
4
4
  * Loads SQL queries from embedded metrics data (generated from metrics.yml at build time).
5
5
  * Provides version-aware query selection and row transformation utilities.
6
6
  */
@@ -9,7 +9,7 @@ import { METRICS, MetricDefinition } from "./metrics-embedded";
9
9
 
10
10
  /**
11
11
  * Get SQL query for a specific metric, selecting the appropriate version.
12
- *
12
+ *
13
13
  * @param metricName - Name of the metric (e.g., "settings", "db_stats")
14
14
  * @param pgMajorVersion - PostgreSQL major version (default: 16)
15
15
  * @returns SQL query string
@@ -41,7 +41,7 @@ export function getMetricSql(metricName: string, pgMajorVersion: number = 16): s
41
41
 
42
42
  /**
43
43
  * Get metric definition including all metadata.
44
- *
44
+ *
45
45
  * @param metricName - Name of the metric
46
46
  * @returns MetricDefinition or undefined if not found
47
47
  */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "postgresai",
3
- "version": "0.15.0-rc.4",
3
+ "version": "0.15.0-rc.6",
4
4
  "description": "postgres_ai CLI",
5
5
  "license": "Apache-2.0",
6
6
  "private": false,
@@ -0,0 +1,28 @@
1
+ import { describe, expect, test } from "bun:test";
2
+ import { readFileSync } from "fs";
3
+ import { resolve } from "path";
4
+
5
+ const cliSource = readFileSync(resolve(import.meta.dir, "../bin/postgres-ai.ts"), "utf8");
6
+
7
+ describe("mon targets configuration apply", () => {
8
+ test("targets add regenerates sources and restarts pgwatch collectors after saving", () => {
9
+ expect(cliSource).toContain("async function applyMonitoringTargetsConfig()");
10
+ expect(cliSource).toContain('runCompose(["run", "--rm", "sources-generator"])');
11
+ expect(cliSource).toContain(
12
+ 'runCompose(["up", "-d", "--force-recreate", "pgwatch-prometheus", "pgwatch-postgres"])'
13
+ );
14
+
15
+ const saveIndex = cliSource.indexOf("addInstanceToFile(file, buildInstance(instanceName, connStr))");
16
+ const applyIndex = cliSource.indexOf("await applyMonitoringTargetsConfig()", saveIndex);
17
+ expect(saveIndex).toBeGreaterThan(-1);
18
+ expect(applyIndex).toBeGreaterThan(saveIndex);
19
+ });
20
+
21
+ test("targets remove regenerates sources and restarts pgwatch collectors after saving", () => {
22
+ const removeIndex = cliSource.indexOf("removeInstanceFromFile(file, name)");
23
+ const applyIndex = cliSource.indexOf("await applyMonitoringTargetsConfig()", removeIndex);
24
+ expect(removeIndex).toBeGreaterThan(-1);
25
+ expect(applyIndex).toBeGreaterThan(-1);
26
+ expect(applyIndex).toBeGreaterThan(removeIndex);
27
+ });
28
+ });
@@ -420,3 +420,126 @@ describe("upgrade CLI commands", () => {
420
420
  expect(stdout).toMatch(/health/i);
421
421
  }, { timeout: TEST_TIMEOUT });
422
422
  });
423
+
424
+ describe("in-place upgrade env migration (mon update / update-config)", () => {
425
+ /**
426
+ * Regression tests for the 0.14 -> 0.15 in-place upgrade gap (#203).
427
+ *
428
+ * Before this fix, a user who installed at 0.14 and ran the documented
429
+ * upgrade flow (`pgai mon update`) ended up with a .env file that lacked
430
+ * VM_AUTH_USERNAME / VM_AUTH_PASSWORD, so sink-prometheus exited with:
431
+ *
432
+ * fatal cannot read "/postgres_ai_configs/prometheus/prometheus.yml":
433
+ * cannot expand environment variables: missing "VM_AUTH_USERNAME" env var
434
+ *
435
+ * `mon update` and `mon update-config` now migrate .env additively before
436
+ * doing anything else.
437
+ */
438
+
439
+ let tempDir: string;
440
+
441
+ beforeAll(() => {
442
+ tempDir = fs.mkdtempSync(resolve(os.tmpdir(), "pgai-upgrade-env-migration-"));
443
+ });
444
+
445
+ afterAll(() => {
446
+ if (tempDir && fs.existsSync(tempDir)) {
447
+ fs.rmSync(tempDir, { recursive: true, force: true });
448
+ }
449
+ });
450
+
451
+ test("mon update-config appends missing VM_AUTH_USERNAME / VM_AUTH_PASSWORD to a 0.14-shaped .env", () => {
452
+ const testDir = resolve(tempDir, "update-config-0.14-env");
453
+ fs.mkdirSync(testDir, { recursive: true });
454
+
455
+ // 0.14-shaped .env: PGAI_TAG present, VM_AUTH_* absent.
456
+ fs.writeFileSync(resolve(testDir, ".env"), "PGAI_TAG=0.14.0\nGF_SECURITY_ADMIN_PASSWORD=user-set-grafana-pw\n");
457
+ fs.writeFileSync(resolve(testDir, "docker-compose.yml"), "version: '3'\nservices: {}\n");
458
+ fs.writeFileSync(resolve(testDir, "instances.yml"), "# instances\n");
459
+
460
+ // The compose run will fail (no Docker in CI), but env migration runs first.
461
+ runCliInDir(["mon", "update-config"], testDir, { PGAI_TAG: undefined });
462
+
463
+ const envContent = fs.readFileSync(resolve(testDir, ".env"), "utf8");
464
+
465
+ // Existing values must be preserved verbatim.
466
+ expect(envContent).toMatch(/^PGAI_TAG=0\.14\.0$/m);
467
+ expect(envContent).toMatch(/^GF_SECURITY_ADMIN_PASSWORD=user-set-grafana-pw$/m);
468
+
469
+ // New required keys must be appended (vmauth username + non-empty base64 password).
470
+ expect(envContent).toMatch(/^VM_AUTH_USERNAME=vmauth$/m);
471
+ expect(envContent).toMatch(/^VM_AUTH_PASSWORD=[A-Za-z0-9+/]+={0,2}$/m);
472
+
473
+ // REPLICATOR_PASSWORD was introduced earlier and is also part of the contract.
474
+ expect(envContent).toMatch(/^REPLICATOR_PASSWORD=[a-f0-9]{64}$/m);
475
+ }, { timeout: TEST_TIMEOUT });
476
+
477
+ test("mon update appends missing VM_AUTH_USERNAME / VM_AUTH_PASSWORD to a 0.14-shaped .env", () => {
478
+ const testDir = resolve(tempDir, "update-0.14-env");
479
+ fs.mkdirSync(testDir, { recursive: true });
480
+
481
+ fs.writeFileSync(resolve(testDir, ".env"), "PGAI_TAG=0.14.0\n");
482
+ fs.writeFileSync(resolve(testDir, "docker-compose.yml"), "version: '3'\nservices: {}\n");
483
+ fs.writeFileSync(resolve(testDir, "instances.yml"), "# instances\n");
484
+
485
+ // mon update will fail (no Docker in CI, no git repo), but env migration runs first.
486
+ const result = runCliInDir(["mon", "update"], testDir, { PGAI_TAG: undefined });
487
+
488
+ const envContent = fs.readFileSync(resolve(testDir, ".env"), "utf8");
489
+
490
+ expect(envContent).toMatch(/^PGAI_TAG=0\.14\.0$/m);
491
+ expect(envContent).toMatch(/^VM_AUTH_USERNAME=vmauth$/m);
492
+ expect(envContent).toMatch(/^VM_AUTH_PASSWORD=[A-Za-z0-9+/]+={0,2}$/m);
493
+
494
+ // The migration step should print what it added so the user can see it.
495
+ expect(result.stdout).toMatch(/Added missing \.env keys/);
496
+ expect(result.stdout).toMatch(/VM_AUTH_USERNAME/);
497
+ expect(result.stdout).toMatch(/VM_AUTH_PASSWORD/);
498
+ }, { timeout: TEST_TIMEOUT });
499
+
500
+ test("mon update preserves existing VM_AUTH_* values (no rotation)", () => {
501
+ const testDir = resolve(tempDir, "update-preserve-vm-auth");
502
+ fs.mkdirSync(testDir, { recursive: true });
503
+
504
+ // User already has VM auth configured (e.g. set up via rotate-vm-auth.sh).
505
+ fs.writeFileSync(
506
+ resolve(testDir, ".env"),
507
+ "PGAI_TAG=0.15.0\nVM_AUTH_USERNAME=custom-user\nVM_AUTH_PASSWORD=custom-pw-do-not-rotate\nREPLICATOR_PASSWORD=" +
508
+ "a".repeat(64) +
509
+ "\n",
510
+ );
511
+ fs.writeFileSync(resolve(testDir, "docker-compose.yml"), "version: '3'\nservices: {}\n");
512
+ fs.writeFileSync(resolve(testDir, "instances.yml"), "# instances\n");
513
+
514
+ const result = runCliInDir(["mon", "update"], testDir, { PGAI_TAG: undefined });
515
+
516
+ const envContent = fs.readFileSync(resolve(testDir, ".env"), "utf8");
517
+
518
+ expect(envContent).toMatch(/^VM_AUTH_USERNAME=custom-user$/m);
519
+ expect(envContent).toMatch(/^VM_AUTH_PASSWORD=custom-pw-do-not-rotate$/m);
520
+ expect(envContent).toMatch(/^REPLICATOR_PASSWORD=a{64}$/m);
521
+
522
+ // When nothing is missing, the migration step should say so.
523
+ expect(result.stdout).toMatch(/\.env is up to date/);
524
+ }, { timeout: TEST_TIMEOUT });
525
+
526
+ test("mon update-config handles a .env that doesn't end with a newline", () => {
527
+ const testDir = resolve(tempDir, "update-config-no-trailing-newline");
528
+ fs.mkdirSync(testDir, { recursive: true });
529
+
530
+ // No trailing newline - migration must add one before appending new keys
531
+ // or we'd produce e.g. `PGAI_TAG=0.14.0VM_AUTH_USERNAME=vmauth`.
532
+ fs.writeFileSync(resolve(testDir, ".env"), "PGAI_TAG=0.14.0");
533
+ fs.writeFileSync(resolve(testDir, "docker-compose.yml"), "version: '3'\nservices: {}\n");
534
+ fs.writeFileSync(resolve(testDir, "instances.yml"), "# instances\n");
535
+
536
+ runCliInDir(["mon", "update-config"], testDir, { PGAI_TAG: undefined });
537
+
538
+ const envContent = fs.readFileSync(resolve(testDir, ".env"), "utf8");
539
+
540
+ expect(envContent).toMatch(/^PGAI_TAG=0\.14\.0$/m);
541
+ expect(envContent).toMatch(/^VM_AUTH_USERNAME=vmauth$/m);
542
+ // No key should be glued onto the previous line.
543
+ expect(envContent).not.toMatch(/PGAI_TAG=0\.14\.0VM_AUTH_USERNAME/);
544
+ }, { timeout: TEST_TIMEOUT });
545
+ });