postgresai 0.16.0-dev.0 → 0.16.0-dev.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13425,7 +13425,7 @@ var {
13425
13425
  // package.json
13426
13426
  var package_default = {
13427
13427
  name: "postgresai",
13428
- version: "0.16.0-dev.0",
13428
+ version: "0.16.0-dev.1",
13429
13429
  description: "postgres_ai CLI",
13430
13430
  license: "Apache-2.0",
13431
13431
  private: false,
@@ -16256,7 +16256,7 @@ var Result = import_lib.default.Result;
16256
16256
  var TypeOverrides = import_lib.default.TypeOverrides;
16257
16257
  var defaults = import_lib.default.defaults;
16258
16258
  // package.json
16259
- var version = "0.16.0-dev.0";
16259
+ var version = "0.16.0-dev.1";
16260
16260
  var package_default2 = {
16261
16261
  name: "postgresai",
16262
16262
  version,
@@ -28498,7 +28498,7 @@ function grafanaBaseUrl() {
28498
28498
  return (process.env.PGAI_GRAFANA_LOCAL_URL || "http://localhost:3000").replace(/\/+$/, "");
28499
28499
  }
28500
28500
  function grafanaAdminUser() {
28501
- return process.env.GF_SECURITY_ADMIN_USER || "admin";
28501
+ return process.env.GF_SECURITY_ADMIN_USER || "monitor";
28502
28502
  }
28503
28503
  function parseVcpus(raw) {
28504
28504
  if (raw === undefined || raw === null || raw === "")
@@ -28575,9 +28575,14 @@ async function resolveDatasourceId(adminPassword, debug = false) {
28575
28575
  return null;
28576
28576
  const list = await res.json().catch(() => []);
28577
28577
  const prom = list.filter((d) => d.type === "prometheus");
28578
- if (prom.length !== 1) {
28578
+ if (prom.length > 1) {
28579
28579
  if (debug)
28580
- console.error(`Debug: AAS: expected 1 prometheus datasource, found ${prom.length}`);
28580
+ console.error(`Debug: AAS: ${prom.length} prometheus datasources (ambiguous); not retrying`);
28581
+ return "ambiguous";
28582
+ }
28583
+ if (prom.length === 0) {
28584
+ if (debug)
28585
+ console.error(`Debug: AAS: no prometheus datasource resolvable yet`);
28581
28586
  return null;
28582
28587
  }
28583
28588
  return typeof prom[0].id === "number" ? prom[0].id : null;
@@ -28593,7 +28598,23 @@ async function registerAasCollection(apiKey, instanceId, opts) {
28593
28598
  const labels = resolveAasLabels(opts.instancesPath);
28594
28599
  if (!labels)
28595
28600
  return { ok: false, reason: "could not determine a single (cluster, node_name) target" };
28596
- const datasourceId = await resolveDatasourceId(opts.grafanaPassword, debug);
28601
+ const maxAttempts = opts.datasourceMaxAttempts ?? 20;
28602
+ const retryDelayMs = opts.datasourceRetryDelayMs ?? 3000;
28603
+ let datasourceId = null;
28604
+ for (let attempt = 1;attempt <= maxAttempts; attempt++) {
28605
+ const resolved = await resolveDatasourceId(opts.grafanaPassword, debug);
28606
+ if (typeof resolved === "number") {
28607
+ datasourceId = resolved;
28608
+ break;
28609
+ }
28610
+ if (resolved === "ambiguous")
28611
+ break;
28612
+ if (attempt < maxAttempts) {
28613
+ if (debug)
28614
+ console.error(`Debug: AAS: datasource not resolvable yet (attempt ${attempt}/${maxAttempts}); waiting for Grafana…`);
28615
+ await new Promise((resolve4) => setTimeout(resolve4, retryDelayMs));
28616
+ }
28617
+ }
28597
28618
  if (datasourceId == null)
28598
28619
  return { ok: false, reason: "could not resolve the Prometheus datasource id" };
28599
28620
  const saToken = await mintAasServiceAccountToken(opts.grafanaPassword, debug);
@@ -28,7 +28,11 @@ function grafanaBaseUrl(): string {
28
28
  }
29
29
 
30
30
  function grafanaAdminUser(): string {
31
- return process.env.GF_SECURITY_ADMIN_USER || "admin";
31
+ // The monitoring stack's compose hardcodes the Grafana admin user to
32
+ // "monitor" (GF_SECURITY_ADMIN_USER: monitor), so default to that rather than
33
+ // Grafana's stock "admin" — otherwise AAS arming logs in as the wrong user
34
+ // and every datasource lookup 401s. An explicit env override still wins.
35
+ return process.env.GF_SECURITY_ADMIN_USER || "monitor";
32
36
  }
33
37
 
34
38
  /** Parse a vcpus input (flag/env) to a non-negative integer; 0 = "unknown" fallback. */
@@ -130,17 +134,26 @@ export async function mintAasServiceAccountToken(
130
134
  * Resolve the single Prometheus-typed datasource's numeric id on the local
131
135
  * Grafana. The monitoring stack's VictoriaMetrics datasource is type
132
136
  * "prometheus" (VM speaks PromQL), and the stack registers exactly one such
133
- * datasource — the same one the collector queries. >1 or 0 → null (skip),
134
- * matching v1.aas_onboard's discovery contract.
137
+ * datasource — the same one the collector queries. 0 / API-not-ready → null
138
+ * (a provisioning transient — the readiness loop retries); >1 → "ambiguous"
139
+ * (a permanent misconfiguration — the loop stops at once), matching
140
+ * v1.aas_onboard's >1 skip.
135
141
  */
136
- export async function resolveDatasourceId(adminPassword: string, debug = false): Promise<number | null> {
142
+ export async function resolveDatasourceId(adminPassword: string, debug = false): Promise<number | "ambiguous" | null> {
137
143
  try {
138
144
  const res = await grafanaApi("GET", "/api/datasources", adminPassword);
139
145
  if (!res.ok) return null;
140
146
  const list = (await res.json().catch(() => [])) as Array<{ id?: unknown; type?: unknown }>;
141
147
  const prom = list.filter((d) => d.type === "prometheus");
142
- if (prom.length !== 1) {
143
- if (debug) console.error(`Debug: AAS: expected 1 prometheus datasource, found ${prom.length}`);
148
+ if (prom.length > 1) {
149
+ // >1 is a permanent misconfiguration, not a provisioning transient: the
150
+ // datasource count only grows as Grafana provisions, so retrying can never
151
+ // resolve it. Signal a definitive skip so the readiness loop bails at once.
152
+ if (debug) console.error(`Debug: AAS: ${prom.length} prometheus datasources (ambiguous); not retrying`);
153
+ return "ambiguous";
154
+ }
155
+ if (prom.length === 0) {
156
+ if (debug) console.error(`Debug: AAS: no prometheus datasource resolvable yet`);
144
157
  return null;
145
158
  }
146
159
  return typeof prom[0].id === "number" ? prom[0].id : null;
@@ -169,6 +182,10 @@ export async function registerAasCollection(
169
182
  apiBaseUrl?: string;
170
183
  debug?: boolean;
171
184
  fetchImpl?: typeof fetch;
185
+ // Grafana-readiness polling for the datasource lookup (Grafana has just
186
+ // been started by `compose up`). Defaults: 20 attempts × 3s.
187
+ datasourceMaxAttempts?: number;
188
+ datasourceRetryDelayMs?: number;
172
189
  }
173
190
  ): Promise<AasRegisterResult> {
174
191
  const debug = !!opts.debug;
@@ -178,7 +195,24 @@ export async function registerAasCollection(
178
195
  const labels = resolveAasLabels(opts.instancesPath);
179
196
  if (!labels) return { ok: false, reason: "could not determine a single (cluster, node_name) target" };
180
197
 
181
- const datasourceId = await resolveDatasourceId(opts.grafanaPassword, debug);
198
+ // Grafana was just started by `compose up`; it needs time to create its
199
+ // admin user, provision datasources, and serve its API. Querying too early
200
+ // makes the datasource lookup fail transiently, so poll until it resolves
201
+ // (best-effort, capped — the install never blocks on this).
202
+ const maxAttempts = opts.datasourceMaxAttempts ?? 20;
203
+ const retryDelayMs = opts.datasourceRetryDelayMs ?? 3000;
204
+ let datasourceId: number | null = null;
205
+ for (let attempt = 1; attempt <= maxAttempts; attempt++) {
206
+ const resolved = await resolveDatasourceId(opts.grafanaPassword, debug);
207
+ if (typeof resolved === "number") { datasourceId = resolved; break; }
208
+ // "ambiguous" (>1 prometheus datasource) is permanent — retrying can't fix
209
+ // it, so stop polling immediately instead of waiting out the whole budget.
210
+ if (resolved === "ambiguous") break;
211
+ if (attempt < maxAttempts) {
212
+ if (debug) console.error(`Debug: AAS: datasource not resolvable yet (attempt ${attempt}/${maxAttempts}); waiting for Grafana…`);
213
+ await new Promise((resolve) => setTimeout(resolve, retryDelayMs));
214
+ }
215
+ }
182
216
  if (datasourceId == null) return { ok: false, reason: "could not resolve the Prometheus datasource id" };
183
217
 
184
218
  const saToken = await mintAasServiceAccountToken(opts.grafanaPassword, debug);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "postgresai",
3
- "version": "0.16.0-dev.0",
3
+ "version": "0.16.0-dev.1",
4
4
  "description": "postgres_ai CLI",
5
5
  "license": "Apache-2.0",
6
6
  "private": false,
@@ -198,6 +198,8 @@ describe("registerAasCollection", () => {
198
198
  installFetch({ prometheusCount: n });
199
199
  const r = await registerAasCollection("apikey-1", "inst-123", {
200
200
  grafanaPassword: "pw", instancesPath, vcpus: 8, apiBaseUrl: "https://api.test",
201
+ // 0/>1 is a definitive skip; cap the readiness retry so the test stays fast.
202
+ datasourceMaxAttempts: 2, datasourceRetryDelayMs: 0,
201
203
  });
202
204
  expect(r.ok).toBe(false);
203
205
  expect(r.reason).toContain("datasource");
@@ -205,6 +207,88 @@ describe("registerAasCollection", () => {
205
207
  }
206
208
  });
207
209
 
210
+ test("polls the datasource until Grafana is ready, then registers", async () => {
211
+ // Grafana isn't ready on the first probes (no prometheus datasource yet),
212
+ // then it provisions — the readiness retry must keep going and then succeed.
213
+ let dsProbes = 0;
214
+ calls = [];
215
+ fetchSpy = spyOn(globalThis, "fetch").mockImplementation((async (input: unknown, init?: { method?: string; body?: string }) => {
216
+ const url = String(input);
217
+ const method = (init?.method || "GET").toUpperCase();
218
+ calls.push({ url, method, body: init?.body });
219
+ if (url.includes("/api/serviceaccounts/search")) return res(true, 200, { serviceAccounts: [] });
220
+ if (url.match(/\/tokens$/) && method === "POST") return res(true, 200, { key: "glsa_mock" });
221
+ if (url.endsWith("/api/serviceaccounts") && method === "POST") return res(true, 201, { id: 42 });
222
+ if (url.includes("/api/datasources")) {
223
+ dsProbes++;
224
+ return dsProbes < 3
225
+ ? res(true, 200, [{ id: 3, type: "loki" }]) // not ready yet
226
+ : res(true, 200, [{ id: 8, type: "prometheus" }, { id: 3, type: "loki" }]);
227
+ }
228
+ if (url.includes("/rpc/monitoring_instance_aas_register")) return res(true, 200, {});
229
+ return res(false, 404, {});
230
+ }) as unknown as typeof fetch);
231
+
232
+ const r = await registerAasCollection("apikey-1", "inst-123", {
233
+ grafanaPassword: "pw", instancesPath, vcpus: 8, apiBaseUrl: "https://api.test",
234
+ datasourceMaxAttempts: 6, datasourceRetryDelayMs: 0,
235
+ });
236
+ expect(r.ok).toBe(true);
237
+ expect(dsProbes).toBeGreaterThanOrEqual(3); // kept polling past the not-ready probes
238
+ const rpc = calls.find((c) => c.url.includes("/rpc/monitoring_instance_aas_register"));
239
+ expect(rpc).toBeDefined();
240
+ expect(JSON.parse(rpc!.body!).datasource_id).toBe(8);
241
+ });
242
+
243
+ test(">1 prometheus datasource is a definitive skip: one probe, no retry", async () => {
244
+ // The >1 case is permanent (the datasource count only grows), so the
245
+ // readiness loop must bail after a single probe, not burn its whole budget.
246
+ let dsProbes = 0;
247
+ calls = [];
248
+ fetchSpy = spyOn(globalThis, "fetch").mockImplementation((async (input: unknown, init?: { method?: string; body?: string }) => {
249
+ const url = String(input);
250
+ const method = (init?.method || "GET").toUpperCase();
251
+ calls.push({ url, method, body: init?.body });
252
+ if (url.includes("/api/datasources")) {
253
+ dsProbes++;
254
+ return res(true, 200, [{ id: 8, type: "prometheus" }, { id: 9, type: "prometheus" }, { id: 3, type: "loki" }]);
255
+ }
256
+ return res(false, 404, {});
257
+ }) as unknown as typeof fetch);
258
+
259
+ const r = await registerAasCollection("apikey-1", "inst-123", {
260
+ grafanaPassword: "pw", instancesPath, vcpus: 8, apiBaseUrl: "https://api.test",
261
+ datasourceMaxAttempts: 5, datasourceRetryDelayMs: 0,
262
+ });
263
+ expect(r.ok).toBe(false);
264
+ expect(r.reason).toContain("datasource");
265
+ expect(dsProbes).toBe(1); // bailed after one probe; did NOT retry 5x
266
+ expect(calls.some((c) => c.url.includes("/rpc/monitoring_instance_aas_register"))).toBe(false);
267
+ });
268
+
269
+ test("never-ready datasource: polls exactly maxAttempts times, then ok:false", async () => {
270
+ // Bounds the readiness loop: a never-appearing datasource must probe exactly
271
+ // maxAttempts times (N probes, N-1 sleeps) and then give up — not loop forever.
272
+ let dsProbes = 0;
273
+ calls = [];
274
+ fetchSpy = spyOn(globalThis, "fetch").mockImplementation((async (input: unknown, init?: { method?: string; body?: string }) => {
275
+ const url = String(input);
276
+ const method = (init?.method || "GET").toUpperCase();
277
+ calls.push({ url, method, body: init?.body });
278
+ if (url.includes("/api/datasources")) { dsProbes++; return res(true, 200, [{ id: 3, type: "loki" }]); } // never a prometheus
279
+ return res(false, 404, {});
280
+ }) as unknown as typeof fetch);
281
+
282
+ const r = await registerAasCollection("apikey-1", "inst-123", {
283
+ grafanaPassword: "pw", instancesPath, vcpus: 8, apiBaseUrl: "https://api.test",
284
+ datasourceMaxAttempts: 3, datasourceRetryDelayMs: 0,
285
+ });
286
+ expect(r.ok).toBe(false);
287
+ expect(r.reason).toContain("datasource");
288
+ expect(dsProbes).toBe(3); // bounded: exactly maxAttempts probes
289
+ expect(calls.some((c) => c.url.includes("/rpc/monitoring_instance_aas_register"))).toBe(false);
290
+ });
291
+
208
292
  test("mint returning no key → ok:false, no RPC call", async () => {
209
293
  installFetch({ mintKey: null });
210
294
  const r = await registerAasCollection("apikey-1", "inst-123", {