@dbx-tools/appkit-autopg 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,771 @@
1
+ /**
2
+ * Lakebase Postgres connection resolver.
3
+ *
4
+ * Reads the same env vars the `lakebase` plugin consumes (`PGHOST`,
5
+ * `PGDATABASE`, `PGPORT`, `PGSSLMODE`, `LAKEBASE_ENDPOINT`) and fills in
6
+ * whichever pieces are missing using the Lakebase Autoscaling REST API
7
+ * under `/api/2.0/postgres/` via the Databricks workspace client.
8
+ *
9
+ * `LAKEBASE_ENDPOINT` (and `config.endpoint`) accept anything
10
+ * {@link parseAddress} understands - canonical resource paths, Postgres
11
+ * URIs, bare hostnames, or bare project ids. The resolver layers
12
+ * whatever pieces fall out of parsing under explicit config / env
13
+ * values, then fills the remaining gaps via the API:
14
+ *
15
+ * 1. Reverse-lookup: when a host is known but no resource path is,
16
+ * scan projects -> branches -> endpoints for a matching
17
+ * `status.hosts.host` and recover the owning project/branch/endpoint.
18
+ * 2. Pick: when a project is known but child resources aren't, prefer
19
+ * the server-side default (`status.default`, `ENDPOINT_TYPE_READ_WRITE`,
20
+ * `databricks_postgres`) and fall back to "the only one" when a
21
+ * listing returns a single result.
22
+ * 3. Auto-create: when no projects exist at all, create one whose
23
+ * id defaults to `projectUtils.name()` slugified (override
24
+ * with `config.autoCreate: "my-id"` or disable with
25
+ * `config.autoCreate: false`). The create call is idempotent - an
26
+ * `ALREADY_EXISTS` response from a concurrent boot is treated as
27
+ * success. Then poll the default endpoint until it reports
28
+ * `current_state` `READY` or `IDLE`.
29
+ *
30
+ * The {@link autopg} helper then writes the resolved values back to
31
+ * `process.env` so the downstream `lakebase` plugin picks them up.
32
+ *
33
+ * @see https://docs.databricks.com/api/workspace/postgres
34
+ */
35
+
36
+ import { getWorkspaceClient } from "@databricks/appkit";
37
+ import { projectUtils, stringUtils, type logUtils } from "@dbx-tools/appkit-shared";
38
+ import { setTimeout as sleep } from "node:timers/promises";
39
+
40
+ import { parseAddress } from "./address.js";
41
+
42
+ const API_BASE = "/api/2.0/postgres";
43
+ const DEFAULT_PORT = 5432;
44
+ const DEFAULT_SSL_MODE: SslMode = "require";
45
+ const DEFAULT_PG_VERSION = 17;
46
+ /** Lakebase project ids: `^[a-z][a-z0-9-]{0,61}[a-z0-9]$`. */
47
+ const PROJECT_ID_MAX_LEN = 63;
48
+ const OPERATION_TIMEOUT_MS = 5 * 60_000;
49
+ const OPERATION_POLL_MS = 2_000;
50
+ const ENDPOINT_READY_TIMEOUT_MS = 5 * 60_000;
51
+ const ENDPOINT_READY_POLL_MS = 2_000;
52
+
53
+ const ENDPOINT_NAME_RE = /^projects\/([^/]+)\/branches\/([^/]+)\/endpoints\/([^/]+)$/;
54
+ const DATABASE_NAME_RE = /^projects\/([^/]+)\/branches\/([^/]+)\/databases\/([^/]+)$/;
55
+ const BRANCH_NAME_RE = /^projects\/([^/]+)\/branches\/([^/]+)$/;
56
+ const PROJECT_NAME_RE = /^projects\/([^/]+)$/;
57
+
58
+ /** Postgres TLS mode passed through to `pg`. */
59
+ export type SslMode = "require" | "disable" | "prefer";
60
+
61
+ /**
62
+ * User-supplied inputs (config or env) before any API resolution. Every
63
+ * field is optional - the resolver tries to fill in missing pieces from
64
+ * the Lakebase API when it has enough context (typically a `project`).
65
+ */
66
+ export interface ResolverInputs {
67
+ /** Lakebase project id, e.g. `my-app`. Triggers API discovery when set. */
68
+ project?: string;
69
+ /** Branch id within the project. Defaults to the server-marked default. */
70
+ branch?: string;
71
+ /**
72
+ * Lakebase address - accepts a canonical endpoint/branch/project
73
+ * resource path, a Postgres URI (`postgresql://user@host/db?...`),
74
+ * a bare Lakebase hostname, or a bare project id. Whatever pieces it
75
+ * carries seed the resolver before REST lookups happen. Reads from
76
+ * `LAKEBASE_ENDPOINT` when not set.
77
+ */
78
+ endpoint?: string;
79
+ /** Postgres database name (e.g. `databricks_postgres`). */
80
+ database?: string;
81
+ /** Postgres hostname; auto-derived from the endpoint when missing. */
82
+ host?: string;
83
+ /** Postgres port. Defaults to 5432. */
84
+ port?: number;
85
+ /** TLS mode. Defaults to `require`. */
86
+ sslMode?: SslMode;
87
+ /**
88
+ * What to do when no project exists in the workspace at all.
89
+ * - `undefined` (default): derive a project id from
90
+ * {@link projectUtils.name} (the host repo's `package.json`
91
+ * name) slugified to Lakebase id constraints, then create it.
92
+ * - `string`: create a new project with this exact id.
93
+ * - `false`: skip creation and throw with a clear error message.
94
+ */
95
+ autoCreate?: string | false;
96
+ }
97
+
98
+ /** Fully-resolved connection. `port` and `sslMode` always have a value. */
99
+ export interface Resolved {
100
+ project?: string;
101
+ branch?: string;
102
+ endpoint?: string;
103
+ database?: string;
104
+ host?: string;
105
+ port: number;
106
+ sslMode: SslMode;
107
+ }
108
+
109
+ /**
110
+ * Lakebase REST list responses follow the Google AIP convention:
111
+ * `{ <plural-resource>: T[], next_page_token?: string }`. We only read
112
+ * the first page; for autopg's "pick something sensible" semantics the
113
+ * cap is fine.
114
+ */
115
+ interface ListResponse {
116
+ next_page_token?: string;
117
+ projects?: Project[];
118
+ branches?: Branch[];
119
+ endpoints?: Endpoint[];
120
+ databases?: Database[];
121
+ }
122
+
123
+ interface Project {
124
+ /** Full resource path: `projects/{p}`. */
125
+ name?: string;
126
+ }
127
+
128
+ interface Endpoint {
129
+ /** Full resource path: `projects/{p}/branches/{b}/endpoints/{e}`. */
130
+ name?: string;
131
+ uid?: string;
132
+ /**
133
+ * Server-side state. All connection info lives here - the spec block
134
+ * only carries the desired configuration, not the runtime hostnames.
135
+ */
136
+ status?: {
137
+ endpoint_type?: "ENDPOINT_TYPE_READ_WRITE" | "ENDPOINT_TYPE_READ_ONLY";
138
+ /** Resolved hostnames; `hosts.host` is the writable primary. */
139
+ hosts?: {
140
+ host?: string;
141
+ read_only_host?: string;
142
+ };
143
+ /** Compute state: `INITIALIZING`, `STARTING`, `READY`, `IDLE`, ... */
144
+ current_state?: string;
145
+ };
146
+ }
147
+
148
+ interface Branch {
149
+ /** Full resource path: `projects/{p}/branches/{b}`. */
150
+ name?: string;
151
+ status?: {
152
+ /** True for the project's default branch (e.g. `production`). */
153
+ default?: boolean;
154
+ current_state?: string;
155
+ };
156
+ }
157
+
158
+ interface Database {
159
+ /** Full resource path: `projects/{p}/branches/{b}/databases/{d}`. */
160
+ name?: string;
161
+ status?: {
162
+ /**
163
+ * Actual Postgres database name (used as `PGDATABASE`). May differ
164
+ * from the resource id - e.g. resource `databricks-postgres`
165
+ * surfaces as Postgres database `databricks_postgres`.
166
+ */
167
+ postgres_database?: string;
168
+ };
169
+ }
170
+
171
+ /**
172
+ * Long-running operation envelope returned by mutating REST calls.
173
+ * `done: true` means terminal; check `error` before reading `response`.
174
+ */
175
+ interface Operation {
176
+ name?: string;
177
+ done?: boolean;
178
+ error?: unknown;
179
+ response?: unknown;
180
+ }
181
+
182
+ /**
183
+ * Pull resolver inputs from `process.env`, parse the address blob, and
184
+ * layer explicit config on top with this precedence:
185
+ *
186
+ * `config.<field>` > matching env var > whatever {@link parseAddress}
187
+ * recovered from the `endpoint` / `LAKEBASE_ENDPOINT` blob.
188
+ */
189
+ export function readInputs(config: ResolverInputs): ResolverInputs {
190
+ const rawAddress = config.endpoint ?? process.env.LAKEBASE_ENDPOINT;
191
+ const parsed = parseAddress(rawAddress);
192
+ const portEnv = process.env.PGPORT;
193
+ return {
194
+ project: config.project ?? process.env.LAKEBASE_PROJECT ?? parsed.project,
195
+ branch: config.branch ?? process.env.LAKEBASE_BRANCH ?? parsed.branch,
196
+ // Only canonical endpoint resource paths survive here; URIs and
197
+ // bare hostnames set `host` instead and leave `endpoint` undefined
198
+ // until the REST resolver fills it in.
199
+ endpoint: parsed.endpoint,
200
+ database: config.database ?? process.env.PGDATABASE ?? parsed.database,
201
+ host: config.host ?? process.env.PGHOST ?? parsed.host,
202
+ port:
203
+ config.port ??
204
+ (portEnv ? Number.parseInt(portEnv, 10) : undefined) ??
205
+ parsed.port,
206
+ sslMode:
207
+ config.sslMode ??
208
+ (process.env.PGSSLMODE as SslMode | undefined) ??
209
+ parsed.sslMode,
210
+ autoCreate: config.autoCreate,
211
+ };
212
+ }
213
+
214
+ /**
215
+ * Resolve a fully-populated Postgres connection record from config + env.
216
+ *
217
+ * Returns immediately without network traffic when env already supplies
218
+ * `endpoint`, `host`, and `database`. Otherwise issues REST calls and
219
+ * may auto-create a project (see module docstring).
220
+ */
221
+ export async function resolveConnection(
222
+ config: ResolverInputs,
223
+ log: logUtils.Logger,
224
+ ): Promise<Resolved> {
225
+ const inputs = readInputs(config);
226
+ let { project, branch, endpoint, database, host } = inputs;
227
+ const port = inputs.port ?? DEFAULT_PORT;
228
+ const sslMode = inputs.sslMode ?? DEFAULT_SSL_MODE;
229
+
230
+ // Resource paths may carry redundant info; harvest project/branch
231
+ // from any canonical path that snuck in via PGDATABASE or similar.
232
+ if (endpoint && (!project || !branch)) {
233
+ const parsed = parseEndpointName(endpoint);
234
+ if (parsed) {
235
+ project ??= parsed.project;
236
+ branch ??= parsed.branch;
237
+ }
238
+ }
239
+ if (database && (!project || !branch)) {
240
+ const parsed = parseDatabaseName(database);
241
+ if (parsed) {
242
+ project ??= parsed.project;
243
+ branch ??= parsed.branch;
244
+ database = parsed.databaseId;
245
+ }
246
+ }
247
+
248
+ // Already complete: skip every REST call.
249
+ if (endpoint && host && database) {
250
+ return { project, branch, endpoint, database, host, port, sslMode };
251
+ }
252
+
253
+ const ws = getWorkspaceClient({});
254
+
255
+ // Host known but no resource path: scan the workspace to find which
256
+ // endpoint owns this host so we can populate LAKEBASE_ENDPOINT.
257
+ if (!project && host) {
258
+ const found = await findEndpointByHost(ws, host, log);
259
+ if (found) {
260
+ project = found.project;
261
+ branch = found.branch;
262
+ endpoint ??= found.endpoint;
263
+ }
264
+ }
265
+
266
+ // No project anywhere in config/env/address: list, pick, or create.
267
+ if (!project) {
268
+ project = await pickOrCreateProject(ws, config.autoCreate, log);
269
+ }
270
+
271
+ if (!branch) {
272
+ branch = await pickBranch(ws, project, log);
273
+ }
274
+
275
+ if (!endpoint) {
276
+ const ep = await pickEndpoint(ws, project, branch, log);
277
+ endpoint = ep.name;
278
+ host ??= ep.host;
279
+ }
280
+
281
+ if (!host && endpoint) {
282
+ const parsed = parseEndpointName(endpoint);
283
+ if (parsed) {
284
+ const ep = await waitEndpointReady(
285
+ ws,
286
+ parsed.project,
287
+ parsed.branch,
288
+ parsed.endpointId,
289
+ log,
290
+ );
291
+ host = ep.status?.hosts?.host;
292
+ log.info("autopg: resolved host from endpoint", { host });
293
+ }
294
+ }
295
+
296
+ if (!database) {
297
+ database = await pickDatabase(ws, project, branch, log);
298
+ }
299
+
300
+ return { project, branch, endpoint, database, host, port, sslMode };
301
+ }
302
+
303
+ /**
304
+ * Write resolved values back to `process.env` so the `lakebase` plugin
305
+ * (which reads env directly) picks them up during its own `setup()`.
306
+ * Existing env values are preserved; only missing keys are filled in,
307
+ * which keeps explicit overrides authoritative.
308
+ */
309
+ export function applyToEnv(resolved: Resolved): void {
310
+ if (resolved.endpoint) process.env.LAKEBASE_ENDPOINT ??= resolved.endpoint;
311
+ if (resolved.host) process.env.PGHOST ??= resolved.host;
312
+ if (resolved.database) process.env.PGDATABASE ??= resolved.database;
313
+ process.env.PGPORT ??= String(resolved.port);
314
+ process.env.PGSSLMODE ??= resolved.sslMode;
315
+ if (resolved.project) process.env.LAKEBASE_PROJECT ??= resolved.project;
316
+ if (resolved.branch) process.env.LAKEBASE_BRANCH ??= resolved.branch;
317
+ }
318
+
319
+ /** Parse `projects/{p}/branches/{b}/endpoints/{e}` into its components. */
320
+ export function parseEndpointName(
321
+ endpoint: string,
322
+ ): { project: string; branch: string; endpointId: string } | null {
323
+ const m = ENDPOINT_NAME_RE.exec(endpoint);
324
+ if (!m) return null;
325
+ return { project: m[1]!, branch: m[2]!, endpointId: m[3]! };
326
+ }
327
+
328
+ /** Parse `projects/{p}/branches/{b}/databases/{d}` into its components. */
329
+ export function parseDatabaseName(
330
+ database: string,
331
+ ): { project: string; branch: string; databaseId: string } | null {
332
+ const m = DATABASE_NAME_RE.exec(database);
333
+ if (!m) return null;
334
+ return { project: m[1]!, branch: m[2]!, databaseId: m[3]! };
335
+ }
336
+
337
+ /** Extract the branch id from a full branch resource path. */
338
+ function branchIdFromName(name: string | undefined): string | undefined {
339
+ if (!name) return undefined;
340
+ const m = BRANCH_NAME_RE.exec(name);
341
+ return m?.[2];
342
+ }
343
+
344
+ /** Extract the project id from a full project resource path. */
345
+ function projectIdFromName(name: string | undefined): string | undefined {
346
+ if (!name) return undefined;
347
+ const m = PROJECT_NAME_RE.exec(name);
348
+ return m?.[1];
349
+ }
350
+
351
+ type WorkspaceClient = ReturnType<typeof getWorkspaceClient>;
352
+
353
+ /** GET helper that always parses JSON and forwards through `apiClient`. */
354
+ async function getJson<T>(ws: WorkspaceClient, path: string): Promise<T> {
355
+ const res = await ws.apiClient.request({
356
+ path,
357
+ method: "GET",
358
+ headers: new Headers({ Accept: "application/json" }),
359
+ raw: false,
360
+ });
361
+ return res as T;
362
+ }
363
+
364
+ /** POST helper for create / mutate calls; returns the parsed JSON body. */
365
+ async function postJson<T>(
366
+ ws: WorkspaceClient,
367
+ path: string,
368
+ body: unknown,
369
+ query?: Record<string, string>,
370
+ ): Promise<T> {
371
+ const res = await ws.apiClient.request({
372
+ path,
373
+ method: "POST",
374
+ query,
375
+ headers: new Headers({
376
+ Accept: "application/json",
377
+ "Content-Type": "application/json",
378
+ }),
379
+ raw: false,
380
+ payload: body,
381
+ });
382
+ return res as T;
383
+ }
384
+
385
+ async function listProjects(ws: WorkspaceClient): Promise<Project[]> {
386
+ const res = await getJson<ListResponse>(ws, `${API_BASE}/projects`);
387
+ return res.projects ?? [];
388
+ }
389
+
390
+ async function listBranches(ws: WorkspaceClient, project: string): Promise<Branch[]> {
391
+ const res = await getJson<ListResponse>(
392
+ ws,
393
+ `${API_BASE}/projects/${project}/branches`,
394
+ );
395
+ return res.branches ?? [];
396
+ }
397
+
398
+ async function listEndpoints(
399
+ ws: WorkspaceClient,
400
+ project: string,
401
+ branch: string,
402
+ ): Promise<Endpoint[]> {
403
+ const res = await getJson<ListResponse>(
404
+ ws,
405
+ `${API_BASE}/projects/${project}/branches/${branch}/endpoints`,
406
+ );
407
+ return res.endpoints ?? [];
408
+ }
409
+
410
+ async function listDatabases(
411
+ ws: WorkspaceClient,
412
+ project: string,
413
+ branch: string,
414
+ ): Promise<Database[]> {
415
+ const res = await getJson<ListResponse>(
416
+ ws,
417
+ `${API_BASE}/projects/${project}/branches/${branch}/databases`,
418
+ );
419
+ return res.databases ?? [];
420
+ }
421
+
422
+ async function getEndpoint(
423
+ ws: WorkspaceClient,
424
+ project: string,
425
+ branch: string,
426
+ endpointId: string,
427
+ ): Promise<Endpoint> {
428
+ return getJson<Endpoint>(
429
+ ws,
430
+ `${API_BASE}/projects/${project}/branches/${branch}/endpoints/${endpointId}`,
431
+ );
432
+ }
433
+
434
+ /**
435
+ * Scan the workspace for an endpoint whose `status.hosts.host` matches
436
+ * the provided hostname. Used to recover the owning project/branch/
437
+ * endpoint resource path when the caller only supplied a Postgres URI.
438
+ *
439
+ * O(projects * branches * endpoints) - fine for typical workspaces
440
+ * (single digits per tier); pagination is intentionally not followed
441
+ * since this is a best-effort fallback.
442
+ */
443
+ async function findEndpointByHost(
444
+ ws: WorkspaceClient,
445
+ host: string,
446
+ log: logUtils.Logger,
447
+ ): Promise<{ project: string; branch: string; endpoint: string } | null> {
448
+ const projects = await listProjects(ws);
449
+ for (const p of projects) {
450
+ const projectId = projectIdFromName(p.name);
451
+ if (!projectId) continue;
452
+ const branches = await listBranches(ws, projectId);
453
+ for (const b of branches) {
454
+ const branchId = branchIdFromName(b.name);
455
+ if (!branchId) continue;
456
+ const endpoints = await listEndpoints(ws, projectId, branchId);
457
+ const match = endpoints.find((e) => e.status?.hosts?.host === host);
458
+ if (match?.name) {
459
+ log.info("autopg: matched endpoint by host", {
460
+ host,
461
+ endpoint: match.name,
462
+ });
463
+ return {
464
+ project: projectId,
465
+ branch: branchId,
466
+ endpoint: match.name,
467
+ };
468
+ }
469
+ }
470
+ }
471
+ log.debug("autopg: no endpoint matched host", { host });
472
+ return null;
473
+ }
474
+
475
+ /**
476
+ * Pick the project to use, or create one when the workspace is empty.
477
+ *
478
+ * Selection order:
479
+ * 1. Exactly one project listed -> use it.
480
+ * 2. Zero projects AND `autoCreate !== false` -> ensure a project with
481
+ * the resolved id exists, then return its id.
482
+ * 3. Zero projects AND `autoCreate === false` -> throw.
483
+ * 4. Multiple projects -> throw with the candidate list (set
484
+ * `LAKEBASE_PROJECT` to disambiguate).
485
+ */
486
+ async function pickOrCreateProject(
487
+ ws: WorkspaceClient,
488
+ autoCreate: string | false | undefined,
489
+ log: logUtils.Logger,
490
+ ): Promise<string> {
491
+ const projects = await listProjects(ws);
492
+ if (projects.length === 1) {
493
+ const id = projectIdFromName(projects[0]!.name);
494
+ if (id) {
495
+ log.info("autopg: using only project", { project: id });
496
+ return id;
497
+ }
498
+ }
499
+ if (projects.length === 0) {
500
+ if (autoCreate === false) {
501
+ throw new Error(
502
+ "autopg: no Lakebase projects found and `autoCreate: false`; create a project or set LAKEBASE_PROJECT",
503
+ );
504
+ }
505
+ const id = autoCreate ?? (await defaultProjectId());
506
+ return ensureProject(ws, id, log);
507
+ }
508
+ const candidates = projects
509
+ .map((p) => projectIdFromName(p.name))
510
+ .filter((id): id is string => Boolean(id))
511
+ .join(", ");
512
+ throw new Error(
513
+ `autopg: multiple projects found; set LAKEBASE_PROJECT or config.project. Candidates: ${candidates}`,
514
+ );
515
+ }
516
+
517
+ /**
518
+ * Derive a Lakebase project id from the host repo's `package.json`
519
+ * name (via {@link projectUtils.name}) slugified to satisfy the
520
+ * Lakebase id constraint (`^[a-z][a-z0-9-]{0,61}[a-z0-9]$`).
521
+ *
522
+ * Throws when the slug ends up empty or starts with a digit, since the
523
+ * server would reject it anyway - callers should pass an explicit
524
+ * `autoCreate` id in that case.
525
+ */
526
+ async function defaultProjectId(): Promise<string> {
527
+ const name = await projectUtils.name();
528
+ const slug = stringUtils.toSlugWithOptions({ maxLength: PROJECT_ID_MAX_LEN }, name);
529
+ if (!slug || !/^[a-z]/.test(slug)) {
530
+ throw new Error(
531
+ `autopg: could not derive a Lakebase project id from project name '${name}'; pass autoCreate explicitly`,
532
+ );
533
+ }
534
+ return slug;
535
+ }
536
+
537
+ /**
538
+ * Ensure a Lakebase project with `projectId` exists. Creates it and
539
+ * waits for the create operation to complete. An `ALREADY_EXISTS`
540
+ * response is treated as success - someone else (a concurrent boot,
541
+ * a sibling process) won the race and the project we wanted is now
542
+ * sitting there ready for downstream pickers.
543
+ *
544
+ * Project creation typically provisions a default `production` branch
545
+ * alongside; downstream pickers handle the rest.
546
+ */
547
+ async function ensureProject(
548
+ ws: WorkspaceClient,
549
+ projectId: string,
550
+ log: logUtils.Logger,
551
+ ): Promise<string> {
552
+ log.warn("autopg: no projects found; creating", { project: projectId });
553
+ try {
554
+ const op = await postJson<Operation>(
555
+ ws,
556
+ `${API_BASE}/projects`,
557
+ { spec: { pg_version: DEFAULT_PG_VERSION } },
558
+ { project_id: projectId },
559
+ );
560
+ await waitForOperation(ws, op, log);
561
+ log.info("autopg: created project", { project: projectId });
562
+ } catch (err) {
563
+ if (!isAlreadyExistsError(err)) throw err;
564
+ log.info("autopg: project already exists (race); proceeding", {
565
+ project: projectId,
566
+ });
567
+ }
568
+ return projectId;
569
+ }
570
+
571
+ /**
572
+ * Recognize the Databricks SDK's `ALREADY_EXISTS` failure modes so a
573
+ * lost race during `ensureProject` becomes a no-op instead of an error.
574
+ *
575
+ * The SDK throws `ApiError { errorCode, statusCode }` for structured
576
+ * server errors and `HttpError { code }` for transport-layer 4xx/5xx.
577
+ * Both surface a human message that often carries "already exists" so
578
+ * we use that as a final fallback for forward compatibility.
579
+ */
580
+ function isAlreadyExistsError(error: unknown): boolean {
581
+ if (!error || typeof error !== "object") return false;
582
+ const e = error as {
583
+ statusCode?: number;
584
+ code?: number;
585
+ errorCode?: string;
586
+ message?: string;
587
+ };
588
+ if (e.statusCode === 409 || e.code === 409) return true;
589
+ if (e.errorCode && /already.?exists/i.test(e.errorCode)) return true;
590
+ if (e.message && /already.?exists/i.test(e.message)) return true;
591
+ return false;
592
+ }
593
+
594
+ /**
595
+ * Poll a Lakebase long-running operation until `done: true`. Returns
596
+ * the final operation envelope (which may carry `response` or `error`).
597
+ *
598
+ * Throws when:
599
+ * - the response carries an `error` field;
600
+ * - `op.name` is missing (nothing to poll);
601
+ * - the timeout elapses before `done: true`.
602
+ */
603
+ async function waitForOperation(
604
+ ws: WorkspaceClient,
605
+ op: Operation,
606
+ log: logUtils.Logger,
607
+ ): Promise<Operation> {
608
+ if (op.done) {
609
+ if (op.error) {
610
+ throw new Error(`autopg: operation failed: ${JSON.stringify(op.error)}`);
611
+ }
612
+ return op;
613
+ }
614
+ const opName = op.name;
615
+ if (!opName) {
616
+ throw new Error("autopg: operation response has no name to poll");
617
+ }
618
+ const start = Date.now();
619
+ while (Date.now() - start < OPERATION_TIMEOUT_MS) {
620
+ await sleep(OPERATION_POLL_MS);
621
+ const current = await getJson<Operation>(ws, `${API_BASE}/${opName}`);
622
+ log.debug("autopg: operation status", { op: opName, done: current.done });
623
+ if (current.done) {
624
+ if (current.error) {
625
+ throw new Error(
626
+ `autopg: operation '${opName}' failed: ${JSON.stringify(current.error)}`,
627
+ );
628
+ }
629
+ return current;
630
+ }
631
+ }
632
+ throw new Error(
633
+ `autopg: operation '${opName}' did not complete within ${OPERATION_TIMEOUT_MS}ms`,
634
+ );
635
+ }
636
+
637
+ /**
638
+ * Poll `getEndpoint` until the compute reports a usable
639
+ * `status.current_state`. `READY` and `IDLE` are both acceptable -
640
+ * `IDLE` just means the compute has scaled to zero but a connection
641
+ * will wake it. Returns the final endpoint payload (with `hosts.host`).
642
+ */
643
+ async function waitEndpointReady(
644
+ ws: WorkspaceClient,
645
+ project: string,
646
+ branch: string,
647
+ endpointId: string,
648
+ log: logUtils.Logger,
649
+ ): Promise<Endpoint> {
650
+ const start = Date.now();
651
+ let last: Endpoint | null = null;
652
+ while (Date.now() - start < ENDPOINT_READY_TIMEOUT_MS) {
653
+ last = await getEndpoint(ws, project, branch, endpointId);
654
+ const state = last.status?.current_state;
655
+ if (state === "READY" || state === "IDLE") return last;
656
+ if (last.status?.hosts?.host && state !== "INITIALIZING") {
657
+ // Compute is in some other state (STARTING, etc.) but hostname is
658
+ // already published - good enough to connect; lakebase's OAuth
659
+ // token request will wake it.
660
+ return last;
661
+ }
662
+ log.debug("autopg: waiting for endpoint", { endpointId, state });
663
+ await sleep(ENDPOINT_READY_POLL_MS);
664
+ }
665
+ throw new Error(
666
+ `autopg: endpoint '${endpointId}' under projects/${project}/branches/${branch} did not become ready within ${ENDPOINT_READY_TIMEOUT_MS}ms (last state: ${last?.status?.current_state ?? "unknown"})`,
667
+ );
668
+ }
669
+
670
+ /**
671
+ * Pick the default branch for a project. Prefers the branch flagged
672
+ * `status.default: true` (server-side default, typically `production`
673
+ * unless the project owner changed it). Falls back to the only branch
674
+ * when there's exactly one. Otherwise throws with the candidate list.
675
+ */
676
+ async function pickBranch(
677
+ ws: WorkspaceClient,
678
+ project: string,
679
+ log: logUtils.Logger,
680
+ ): Promise<string> {
681
+ const branches = await listBranches(ws, project);
682
+ if (branches.length === 0) {
683
+ throw new Error(
684
+ `autopg: project '${project}' has no branches; cannot resolve a default`,
685
+ );
686
+ }
687
+ const flagged = branches.find((b) => b.status?.default === true);
688
+ const choice =
689
+ branchIdFromName(flagged?.name) ??
690
+ (branches.length === 1 ? branchIdFromName(branches[0]!.name) : undefined);
691
+ if (!choice) {
692
+ const candidates = branches
693
+ .map((b) => branchIdFromName(b.name))
694
+ .filter((id): id is string => Boolean(id))
695
+ .join(", ");
696
+ throw new Error(
697
+ `autopg: project '${project}' has multiple branches and none marked default; set LAKEBASE_BRANCH or config.branch. Candidates: ${candidates}`,
698
+ );
699
+ }
700
+ log.info("autopg: resolved branch", { project, branch: choice });
701
+ return choice;
702
+ }
703
+
704
+ /**
705
+ * Pick the primary endpoint for a (project, branch). Prefers
706
+ * `status.endpoint_type === ENDPOINT_TYPE_READ_WRITE`; falls back to
707
+ * the only endpoint when there's exactly one. Returns `{ name, host }`
708
+ * so the caller can populate both `LAKEBASE_ENDPOINT` and `PGHOST`
709
+ * from a single call.
710
+ */
711
+ async function pickEndpoint(
712
+ ws: WorkspaceClient,
713
+ project: string,
714
+ branch: string,
715
+ log: logUtils.Logger,
716
+ ): Promise<{ name: string; host?: string }> {
717
+ const endpoints = await listEndpoints(ws, project, branch);
718
+ if (endpoints.length === 0) {
719
+ throw new Error(
720
+ `autopg: branch 'projects/${project}/branches/${branch}' has no endpoints; cannot resolve LAKEBASE_ENDPOINT`,
721
+ );
722
+ }
723
+ const primary =
724
+ endpoints.find((e) => e.status?.endpoint_type === "ENDPOINT_TYPE_READ_WRITE") ??
725
+ (endpoints.length === 1 ? endpoints[0] : undefined);
726
+ if (!primary?.name) {
727
+ const names = endpoints.map((e) => e.name).filter(Boolean);
728
+ throw new Error(
729
+ `autopg: branch has no primary READ_WRITE endpoint; set LAKEBASE_ENDPOINT or config.endpoint. Candidates: ${names.join(", ")}`,
730
+ );
731
+ }
732
+ const host = primary.status?.hosts?.host;
733
+ log.info("autopg: resolved endpoint", { endpoint: primary.name, host });
734
+ return { name: primary.name, host };
735
+ }
736
+
737
+ /**
738
+ * Pick the default postgres database for a (project, branch). The
739
+ * Postgres database NAME (`status.postgres_database`) is what
740
+ * `PGDATABASE` needs - this differs from the resource id, which can
741
+ * use a different separator (e.g. resource `databricks-postgres`
742
+ * surfaces as database `databricks_postgres`). Prefers
743
+ * `databricks_postgres` (the Lakebase default), otherwise the only
744
+ * database.
745
+ */
746
+ async function pickDatabase(
747
+ ws: WorkspaceClient,
748
+ project: string,
749
+ branch: string,
750
+ log: logUtils.Logger,
751
+ ): Promise<string> {
752
+ const databases = await listDatabases(ws, project, branch);
753
+ if (databases.length === 0) {
754
+ throw new Error(
755
+ `autopg: branch 'projects/${project}/branches/${branch}' has no databases; cannot resolve PGDATABASE`,
756
+ );
757
+ }
758
+ const names = databases
759
+ .map((d) => d.status?.postgres_database)
760
+ .filter((n): n is string => Boolean(n));
761
+ const choice =
762
+ names.find((n) => n === "databricks_postgres") ??
763
+ (names.length === 1 ? names[0] : undefined);
764
+ if (!choice) {
765
+ throw new Error(
766
+ `autopg: multiple databases and no 'databricks_postgres'; set PGDATABASE or config.database. Candidates: ${names.join(", ")}`,
767
+ );
768
+ }
769
+ log.info("autopg: resolved database", { database: choice });
770
+ return choice;
771
+ }