@dbx-tools/appkit-autopg 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +113 -0
- package/dist/index.d.ts +18 -0
- package/dist/index.js +18 -0
- package/dist/src/address.d.ts +69 -0
- package/dist/src/address.js +132 -0
- package/dist/src/autopg.d.ts +59 -0
- package/dist/src/autopg.js +74 -0
- package/dist/src/resolver.d.ts +118 -0
- package/dist/src/resolver.js +507 -0
- package/index.ts +18 -0
- package/package.json +40 -0
- package/src/address.ts +148 -0
- package/src/autopg.ts +93 -0
- package/src/resolver.ts +771 -0
package/src/resolver.ts
ADDED
|
@@ -0,0 +1,771 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lakebase Postgres connection resolver.
|
|
3
|
+
*
|
|
4
|
+
* Reads the same env vars the `lakebase` plugin consumes (`PGHOST`,
|
|
5
|
+
* `PGDATABASE`, `PGPORT`, `PGSSLMODE`, `LAKEBASE_ENDPOINT`) and fills in
|
|
6
|
+
* whichever pieces are missing using the Lakebase Autoscaling REST API
|
|
7
|
+
* under `/api/2.0/postgres/` via the Databricks workspace client.
|
|
8
|
+
*
|
|
9
|
+
* `LAKEBASE_ENDPOINT` (and `config.endpoint`) accept anything
|
|
10
|
+
* {@link parseAddress} understands - canonical resource paths, Postgres
|
|
11
|
+
* URIs, bare hostnames, or bare project ids. The resolver layers
|
|
12
|
+
* whatever pieces fall out of parsing under explicit config / env
|
|
13
|
+
* values, then fills the remaining gaps via the API:
|
|
14
|
+
*
|
|
15
|
+
* 1. Reverse-lookup: when a host is known but no resource path is,
|
|
16
|
+
* scan projects -> branches -> endpoints for a matching
|
|
17
|
+
* `status.hosts.host` and recover the owning project/branch/endpoint.
|
|
18
|
+
* 2. Pick: when a project is known but child resources aren't, prefer
|
|
19
|
+
* the server-side default (`status.default`, `ENDPOINT_TYPE_READ_WRITE`,
|
|
20
|
+
* `databricks_postgres`) and fall back to "the only one" when a
|
|
21
|
+
* listing returns a single result.
|
|
22
|
+
* 3. Auto-create: when no projects exist at all, create one whose
|
|
23
|
+
* id defaults to `projectUtils.name()` slugified (override
|
|
24
|
+
* with `config.autoCreate: "my-id"` or disable with
|
|
25
|
+
* `config.autoCreate: false`). The create call is idempotent - an
|
|
26
|
+
* `ALREADY_EXISTS` response from a concurrent boot is treated as
|
|
27
|
+
* success. Then poll the default endpoint until it reports
|
|
28
|
+
* `current_state` `READY` or `IDLE`.
|
|
29
|
+
*
|
|
30
|
+
* The {@link autopg} helper then writes the resolved values back to
|
|
31
|
+
* `process.env` so the downstream `lakebase` plugin picks them up.
|
|
32
|
+
*
|
|
33
|
+
* @see https://docs.databricks.com/api/workspace/postgres
|
|
34
|
+
*/
|
|
35
|
+
|
|
36
|
+
import { getWorkspaceClient } from "@databricks/appkit";
|
|
37
|
+
import { projectUtils, stringUtils, type logUtils } from "@dbx-tools/appkit-shared";
|
|
38
|
+
import { setTimeout as sleep } from "node:timers/promises";
|
|
39
|
+
|
|
40
|
+
import { parseAddress } from "./address.js";
|
|
41
|
+
|
|
42
|
+
const API_BASE = "/api/2.0/postgres";
|
|
43
|
+
const DEFAULT_PORT = 5432;
|
|
44
|
+
const DEFAULT_SSL_MODE: SslMode = "require";
|
|
45
|
+
const DEFAULT_PG_VERSION = 17;
|
|
46
|
+
/** Lakebase project ids: `^[a-z][a-z0-9-]{0,61}[a-z0-9]$`. */
|
|
47
|
+
const PROJECT_ID_MAX_LEN = 63;
|
|
48
|
+
const OPERATION_TIMEOUT_MS = 5 * 60_000;
|
|
49
|
+
const OPERATION_POLL_MS = 2_000;
|
|
50
|
+
const ENDPOINT_READY_TIMEOUT_MS = 5 * 60_000;
|
|
51
|
+
const ENDPOINT_READY_POLL_MS = 2_000;
|
|
52
|
+
|
|
53
|
+
const ENDPOINT_NAME_RE = /^projects\/([^/]+)\/branches\/([^/]+)\/endpoints\/([^/]+)$/;
|
|
54
|
+
const DATABASE_NAME_RE = /^projects\/([^/]+)\/branches\/([^/]+)\/databases\/([^/]+)$/;
|
|
55
|
+
const BRANCH_NAME_RE = /^projects\/([^/]+)\/branches\/([^/]+)$/;
|
|
56
|
+
const PROJECT_NAME_RE = /^projects\/([^/]+)$/;
|
|
57
|
+
|
|
58
|
+
/** Postgres TLS mode passed through to `pg`. */
|
|
59
|
+
export type SslMode = "require" | "disable" | "prefer";
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* User-supplied inputs (config or env) before any API resolution. Every
|
|
63
|
+
* field is optional - the resolver tries to fill in missing pieces from
|
|
64
|
+
* the Lakebase API when it has enough context (typically a `project`).
|
|
65
|
+
*/
|
|
66
|
+
export interface ResolverInputs {
|
|
67
|
+
/** Lakebase project id, e.g. `my-app`. Triggers API discovery when set. */
|
|
68
|
+
project?: string;
|
|
69
|
+
/** Branch id within the project. Defaults to the server-marked default. */
|
|
70
|
+
branch?: string;
|
|
71
|
+
/**
|
|
72
|
+
* Lakebase address - accepts a canonical endpoint/branch/project
|
|
73
|
+
* resource path, a Postgres URI (`postgresql://user@host/db?...`),
|
|
74
|
+
* a bare Lakebase hostname, or a bare project id. Whatever pieces it
|
|
75
|
+
* carries seed the resolver before REST lookups happen. Reads from
|
|
76
|
+
* `LAKEBASE_ENDPOINT` when not set.
|
|
77
|
+
*/
|
|
78
|
+
endpoint?: string;
|
|
79
|
+
/** Postgres database name (e.g. `databricks_postgres`). */
|
|
80
|
+
database?: string;
|
|
81
|
+
/** Postgres hostname; auto-derived from the endpoint when missing. */
|
|
82
|
+
host?: string;
|
|
83
|
+
/** Postgres port. Defaults to 5432. */
|
|
84
|
+
port?: number;
|
|
85
|
+
/** TLS mode. Defaults to `require`. */
|
|
86
|
+
sslMode?: SslMode;
|
|
87
|
+
/**
|
|
88
|
+
* What to do when no project exists in the workspace at all.
|
|
89
|
+
* - `undefined` (default): derive a project id from
|
|
90
|
+
* {@link projectUtils.name} (the host repo's `package.json`
|
|
91
|
+
* name) slugified to Lakebase id constraints, then create it.
|
|
92
|
+
* - `string`: create a new project with this exact id.
|
|
93
|
+
* - `false`: skip creation and throw with a clear error message.
|
|
94
|
+
*/
|
|
95
|
+
autoCreate?: string | false;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/** Fully-resolved connection. `port` and `sslMode` always have a value. */
|
|
99
|
+
export interface Resolved {
|
|
100
|
+
project?: string;
|
|
101
|
+
branch?: string;
|
|
102
|
+
endpoint?: string;
|
|
103
|
+
database?: string;
|
|
104
|
+
host?: string;
|
|
105
|
+
port: number;
|
|
106
|
+
sslMode: SslMode;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Lakebase REST list responses follow the Google AIP convention:
|
|
111
|
+
* `{ <plural-resource>: T[], next_page_token?: string }`. We only read
|
|
112
|
+
* the first page; for autopg's "pick something sensible" semantics the
|
|
113
|
+
* cap is fine.
|
|
114
|
+
*/
|
|
115
|
+
interface ListResponse {
|
|
116
|
+
next_page_token?: string;
|
|
117
|
+
projects?: Project[];
|
|
118
|
+
branches?: Branch[];
|
|
119
|
+
endpoints?: Endpoint[];
|
|
120
|
+
databases?: Database[];
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
interface Project {
|
|
124
|
+
/** Full resource path: `projects/{p}`. */
|
|
125
|
+
name?: string;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
interface Endpoint {
|
|
129
|
+
/** Full resource path: `projects/{p}/branches/{b}/endpoints/{e}`. */
|
|
130
|
+
name?: string;
|
|
131
|
+
uid?: string;
|
|
132
|
+
/**
|
|
133
|
+
* Server-side state. All connection info lives here - the spec block
|
|
134
|
+
* only carries the desired configuration, not the runtime hostnames.
|
|
135
|
+
*/
|
|
136
|
+
status?: {
|
|
137
|
+
endpoint_type?: "ENDPOINT_TYPE_READ_WRITE" | "ENDPOINT_TYPE_READ_ONLY";
|
|
138
|
+
/** Resolved hostnames; `hosts.host` is the writable primary. */
|
|
139
|
+
hosts?: {
|
|
140
|
+
host?: string;
|
|
141
|
+
read_only_host?: string;
|
|
142
|
+
};
|
|
143
|
+
/** Compute state: `INITIALIZING`, `STARTING`, `READY`, `IDLE`, ... */
|
|
144
|
+
current_state?: string;
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
interface Branch {
|
|
149
|
+
/** Full resource path: `projects/{p}/branches/{b}`. */
|
|
150
|
+
name?: string;
|
|
151
|
+
status?: {
|
|
152
|
+
/** True for the project's default branch (e.g. `production`). */
|
|
153
|
+
default?: boolean;
|
|
154
|
+
current_state?: string;
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
interface Database {
|
|
159
|
+
/** Full resource path: `projects/{p}/branches/{b}/databases/{d}`. */
|
|
160
|
+
name?: string;
|
|
161
|
+
status?: {
|
|
162
|
+
/**
|
|
163
|
+
* Actual Postgres database name (used as `PGDATABASE`). May differ
|
|
164
|
+
* from the resource id - e.g. resource `databricks-postgres`
|
|
165
|
+
* surfaces as Postgres database `databricks_postgres`.
|
|
166
|
+
*/
|
|
167
|
+
postgres_database?: string;
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Long-running operation envelope returned by mutating REST calls.
|
|
173
|
+
* `done: true` means terminal; check `error` before reading `response`.
|
|
174
|
+
*/
|
|
175
|
+
interface Operation {
|
|
176
|
+
name?: string;
|
|
177
|
+
done?: boolean;
|
|
178
|
+
error?: unknown;
|
|
179
|
+
response?: unknown;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* Pull resolver inputs from `process.env`, parse the address blob, and
|
|
184
|
+
* layer explicit config on top with this precedence:
|
|
185
|
+
*
|
|
186
|
+
* `config.<field>` > matching env var > whatever {@link parseAddress}
|
|
187
|
+
* recovered from the `endpoint` / `LAKEBASE_ENDPOINT` blob.
|
|
188
|
+
*/
|
|
189
|
+
export function readInputs(config: ResolverInputs): ResolverInputs {
|
|
190
|
+
const rawAddress = config.endpoint ?? process.env.LAKEBASE_ENDPOINT;
|
|
191
|
+
const parsed = parseAddress(rawAddress);
|
|
192
|
+
const portEnv = process.env.PGPORT;
|
|
193
|
+
return {
|
|
194
|
+
project: config.project ?? process.env.LAKEBASE_PROJECT ?? parsed.project,
|
|
195
|
+
branch: config.branch ?? process.env.LAKEBASE_BRANCH ?? parsed.branch,
|
|
196
|
+
// Only canonical endpoint resource paths survive here; URIs and
|
|
197
|
+
// bare hostnames set `host` instead and leave `endpoint` undefined
|
|
198
|
+
// until the REST resolver fills it in.
|
|
199
|
+
endpoint: parsed.endpoint,
|
|
200
|
+
database: config.database ?? process.env.PGDATABASE ?? parsed.database,
|
|
201
|
+
host: config.host ?? process.env.PGHOST ?? parsed.host,
|
|
202
|
+
port:
|
|
203
|
+
config.port ??
|
|
204
|
+
(portEnv ? Number.parseInt(portEnv, 10) : undefined) ??
|
|
205
|
+
parsed.port,
|
|
206
|
+
sslMode:
|
|
207
|
+
config.sslMode ??
|
|
208
|
+
(process.env.PGSSLMODE as SslMode | undefined) ??
|
|
209
|
+
parsed.sslMode,
|
|
210
|
+
autoCreate: config.autoCreate,
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
/**
|
|
215
|
+
* Resolve a fully-populated Postgres connection record from config + env.
|
|
216
|
+
*
|
|
217
|
+
* Returns immediately without network traffic when env already supplies
|
|
218
|
+
* `endpoint`, `host`, and `database`. Otherwise issues REST calls and
|
|
219
|
+
* may auto-create a project (see module docstring).
|
|
220
|
+
*/
|
|
221
|
+
export async function resolveConnection(
|
|
222
|
+
config: ResolverInputs,
|
|
223
|
+
log: logUtils.Logger,
|
|
224
|
+
): Promise<Resolved> {
|
|
225
|
+
const inputs = readInputs(config);
|
|
226
|
+
let { project, branch, endpoint, database, host } = inputs;
|
|
227
|
+
const port = inputs.port ?? DEFAULT_PORT;
|
|
228
|
+
const sslMode = inputs.sslMode ?? DEFAULT_SSL_MODE;
|
|
229
|
+
|
|
230
|
+
// Resource paths may carry redundant info; harvest project/branch
|
|
231
|
+
// from any canonical path that snuck in via PGDATABASE or similar.
|
|
232
|
+
if (endpoint && (!project || !branch)) {
|
|
233
|
+
const parsed = parseEndpointName(endpoint);
|
|
234
|
+
if (parsed) {
|
|
235
|
+
project ??= parsed.project;
|
|
236
|
+
branch ??= parsed.branch;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
if (database && (!project || !branch)) {
|
|
240
|
+
const parsed = parseDatabaseName(database);
|
|
241
|
+
if (parsed) {
|
|
242
|
+
project ??= parsed.project;
|
|
243
|
+
branch ??= parsed.branch;
|
|
244
|
+
database = parsed.databaseId;
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// Already complete: skip every REST call.
|
|
249
|
+
if (endpoint && host && database) {
|
|
250
|
+
return { project, branch, endpoint, database, host, port, sslMode };
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
const ws = getWorkspaceClient({});
|
|
254
|
+
|
|
255
|
+
// Host known but no resource path: scan the workspace to find which
|
|
256
|
+
// endpoint owns this host so we can populate LAKEBASE_ENDPOINT.
|
|
257
|
+
if (!project && host) {
|
|
258
|
+
const found = await findEndpointByHost(ws, host, log);
|
|
259
|
+
if (found) {
|
|
260
|
+
project = found.project;
|
|
261
|
+
branch = found.branch;
|
|
262
|
+
endpoint ??= found.endpoint;
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
// No project anywhere in config/env/address: list, pick, or create.
|
|
267
|
+
if (!project) {
|
|
268
|
+
project = await pickOrCreateProject(ws, config.autoCreate, log);
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
if (!branch) {
|
|
272
|
+
branch = await pickBranch(ws, project, log);
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
if (!endpoint) {
|
|
276
|
+
const ep = await pickEndpoint(ws, project, branch, log);
|
|
277
|
+
endpoint = ep.name;
|
|
278
|
+
host ??= ep.host;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
if (!host && endpoint) {
|
|
282
|
+
const parsed = parseEndpointName(endpoint);
|
|
283
|
+
if (parsed) {
|
|
284
|
+
const ep = await waitEndpointReady(
|
|
285
|
+
ws,
|
|
286
|
+
parsed.project,
|
|
287
|
+
parsed.branch,
|
|
288
|
+
parsed.endpointId,
|
|
289
|
+
log,
|
|
290
|
+
);
|
|
291
|
+
host = ep.status?.hosts?.host;
|
|
292
|
+
log.info("autopg: resolved host from endpoint", { host });
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
if (!database) {
|
|
297
|
+
database = await pickDatabase(ws, project, branch, log);
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
return { project, branch, endpoint, database, host, port, sslMode };
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
/**
|
|
304
|
+
* Write resolved values back to `process.env` so the `lakebase` plugin
|
|
305
|
+
* (which reads env directly) picks them up during its own `setup()`.
|
|
306
|
+
* Existing env values are preserved; only missing keys are filled in,
|
|
307
|
+
* which keeps explicit overrides authoritative.
|
|
308
|
+
*/
|
|
309
|
+
export function applyToEnv(resolved: Resolved): void {
|
|
310
|
+
if (resolved.endpoint) process.env.LAKEBASE_ENDPOINT ??= resolved.endpoint;
|
|
311
|
+
if (resolved.host) process.env.PGHOST ??= resolved.host;
|
|
312
|
+
if (resolved.database) process.env.PGDATABASE ??= resolved.database;
|
|
313
|
+
process.env.PGPORT ??= String(resolved.port);
|
|
314
|
+
process.env.PGSSLMODE ??= resolved.sslMode;
|
|
315
|
+
if (resolved.project) process.env.LAKEBASE_PROJECT ??= resolved.project;
|
|
316
|
+
if (resolved.branch) process.env.LAKEBASE_BRANCH ??= resolved.branch;
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
/** Parse `projects/{p}/branches/{b}/endpoints/{e}` into its components. */
|
|
320
|
+
export function parseEndpointName(
|
|
321
|
+
endpoint: string,
|
|
322
|
+
): { project: string; branch: string; endpointId: string } | null {
|
|
323
|
+
const m = ENDPOINT_NAME_RE.exec(endpoint);
|
|
324
|
+
if (!m) return null;
|
|
325
|
+
return { project: m[1]!, branch: m[2]!, endpointId: m[3]! };
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
/** Parse `projects/{p}/branches/{b}/databases/{d}` into its components. */
|
|
329
|
+
export function parseDatabaseName(
|
|
330
|
+
database: string,
|
|
331
|
+
): { project: string; branch: string; databaseId: string } | null {
|
|
332
|
+
const m = DATABASE_NAME_RE.exec(database);
|
|
333
|
+
if (!m) return null;
|
|
334
|
+
return { project: m[1]!, branch: m[2]!, databaseId: m[3]! };
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
/** Extract the branch id from a full branch resource path. */
|
|
338
|
+
function branchIdFromName(name: string | undefined): string | undefined {
|
|
339
|
+
if (!name) return undefined;
|
|
340
|
+
const m = BRANCH_NAME_RE.exec(name);
|
|
341
|
+
return m?.[2];
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
/** Extract the project id from a full project resource path. */
|
|
345
|
+
function projectIdFromName(name: string | undefined): string | undefined {
|
|
346
|
+
if (!name) return undefined;
|
|
347
|
+
const m = PROJECT_NAME_RE.exec(name);
|
|
348
|
+
return m?.[1];
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
type WorkspaceClient = ReturnType<typeof getWorkspaceClient>;
|
|
352
|
+
|
|
353
|
+
/** GET helper that always parses JSON and forwards through `apiClient`. */
|
|
354
|
+
async function getJson<T>(ws: WorkspaceClient, path: string): Promise<T> {
|
|
355
|
+
const res = await ws.apiClient.request({
|
|
356
|
+
path,
|
|
357
|
+
method: "GET",
|
|
358
|
+
headers: new Headers({ Accept: "application/json" }),
|
|
359
|
+
raw: false,
|
|
360
|
+
});
|
|
361
|
+
return res as T;
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
/** POST helper for create / mutate calls; returns the parsed JSON body. */
|
|
365
|
+
async function postJson<T>(
|
|
366
|
+
ws: WorkspaceClient,
|
|
367
|
+
path: string,
|
|
368
|
+
body: unknown,
|
|
369
|
+
query?: Record<string, string>,
|
|
370
|
+
): Promise<T> {
|
|
371
|
+
const res = await ws.apiClient.request({
|
|
372
|
+
path,
|
|
373
|
+
method: "POST",
|
|
374
|
+
query,
|
|
375
|
+
headers: new Headers({
|
|
376
|
+
Accept: "application/json",
|
|
377
|
+
"Content-Type": "application/json",
|
|
378
|
+
}),
|
|
379
|
+
raw: false,
|
|
380
|
+
payload: body,
|
|
381
|
+
});
|
|
382
|
+
return res as T;
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
async function listProjects(ws: WorkspaceClient): Promise<Project[]> {
|
|
386
|
+
const res = await getJson<ListResponse>(ws, `${API_BASE}/projects`);
|
|
387
|
+
return res.projects ?? [];
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
async function listBranches(ws: WorkspaceClient, project: string): Promise<Branch[]> {
|
|
391
|
+
const res = await getJson<ListResponse>(
|
|
392
|
+
ws,
|
|
393
|
+
`${API_BASE}/projects/${project}/branches`,
|
|
394
|
+
);
|
|
395
|
+
return res.branches ?? [];
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
async function listEndpoints(
|
|
399
|
+
ws: WorkspaceClient,
|
|
400
|
+
project: string,
|
|
401
|
+
branch: string,
|
|
402
|
+
): Promise<Endpoint[]> {
|
|
403
|
+
const res = await getJson<ListResponse>(
|
|
404
|
+
ws,
|
|
405
|
+
`${API_BASE}/projects/${project}/branches/${branch}/endpoints`,
|
|
406
|
+
);
|
|
407
|
+
return res.endpoints ?? [];
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
async function listDatabases(
|
|
411
|
+
ws: WorkspaceClient,
|
|
412
|
+
project: string,
|
|
413
|
+
branch: string,
|
|
414
|
+
): Promise<Database[]> {
|
|
415
|
+
const res = await getJson<ListResponse>(
|
|
416
|
+
ws,
|
|
417
|
+
`${API_BASE}/projects/${project}/branches/${branch}/databases`,
|
|
418
|
+
);
|
|
419
|
+
return res.databases ?? [];
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
async function getEndpoint(
|
|
423
|
+
ws: WorkspaceClient,
|
|
424
|
+
project: string,
|
|
425
|
+
branch: string,
|
|
426
|
+
endpointId: string,
|
|
427
|
+
): Promise<Endpoint> {
|
|
428
|
+
return getJson<Endpoint>(
|
|
429
|
+
ws,
|
|
430
|
+
`${API_BASE}/projects/${project}/branches/${branch}/endpoints/${endpointId}`,
|
|
431
|
+
);
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
/**
|
|
435
|
+
* Scan the workspace for an endpoint whose `status.hosts.host` matches
|
|
436
|
+
* the provided hostname. Used to recover the owning project/branch/
|
|
437
|
+
* endpoint resource path when the caller only supplied a Postgres URI.
|
|
438
|
+
*
|
|
439
|
+
* O(projects * branches * endpoints) - fine for typical workspaces
|
|
440
|
+
* (single digits per tier); pagination is intentionally not followed
|
|
441
|
+
* since this is a best-effort fallback.
|
|
442
|
+
*/
|
|
443
|
+
async function findEndpointByHost(
|
|
444
|
+
ws: WorkspaceClient,
|
|
445
|
+
host: string,
|
|
446
|
+
log: logUtils.Logger,
|
|
447
|
+
): Promise<{ project: string; branch: string; endpoint: string } | null> {
|
|
448
|
+
const projects = await listProjects(ws);
|
|
449
|
+
for (const p of projects) {
|
|
450
|
+
const projectId = projectIdFromName(p.name);
|
|
451
|
+
if (!projectId) continue;
|
|
452
|
+
const branches = await listBranches(ws, projectId);
|
|
453
|
+
for (const b of branches) {
|
|
454
|
+
const branchId = branchIdFromName(b.name);
|
|
455
|
+
if (!branchId) continue;
|
|
456
|
+
const endpoints = await listEndpoints(ws, projectId, branchId);
|
|
457
|
+
const match = endpoints.find((e) => e.status?.hosts?.host === host);
|
|
458
|
+
if (match?.name) {
|
|
459
|
+
log.info("autopg: matched endpoint by host", {
|
|
460
|
+
host,
|
|
461
|
+
endpoint: match.name,
|
|
462
|
+
});
|
|
463
|
+
return {
|
|
464
|
+
project: projectId,
|
|
465
|
+
branch: branchId,
|
|
466
|
+
endpoint: match.name,
|
|
467
|
+
};
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
log.debug("autopg: no endpoint matched host", { host });
|
|
472
|
+
return null;
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
/**
|
|
476
|
+
* Pick the project to use, or create one when the workspace is empty.
|
|
477
|
+
*
|
|
478
|
+
* Selection order:
|
|
479
|
+
* 1. Exactly one project listed -> use it.
|
|
480
|
+
* 2. Zero projects AND `autoCreate !== false` -> ensure a project with
|
|
481
|
+
* the resolved id exists, then return its id.
|
|
482
|
+
* 3. Zero projects AND `autoCreate === false` -> throw.
|
|
483
|
+
* 4. Multiple projects -> throw with the candidate list (set
|
|
484
|
+
* `LAKEBASE_PROJECT` to disambiguate).
|
|
485
|
+
*/
|
|
486
|
+
async function pickOrCreateProject(
|
|
487
|
+
ws: WorkspaceClient,
|
|
488
|
+
autoCreate: string | false | undefined,
|
|
489
|
+
log: logUtils.Logger,
|
|
490
|
+
): Promise<string> {
|
|
491
|
+
const projects = await listProjects(ws);
|
|
492
|
+
if (projects.length === 1) {
|
|
493
|
+
const id = projectIdFromName(projects[0]!.name);
|
|
494
|
+
if (id) {
|
|
495
|
+
log.info("autopg: using only project", { project: id });
|
|
496
|
+
return id;
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
if (projects.length === 0) {
|
|
500
|
+
if (autoCreate === false) {
|
|
501
|
+
throw new Error(
|
|
502
|
+
"autopg: no Lakebase projects found and `autoCreate: false`; create a project or set LAKEBASE_PROJECT",
|
|
503
|
+
);
|
|
504
|
+
}
|
|
505
|
+
const id = autoCreate ?? (await defaultProjectId());
|
|
506
|
+
return ensureProject(ws, id, log);
|
|
507
|
+
}
|
|
508
|
+
const candidates = projects
|
|
509
|
+
.map((p) => projectIdFromName(p.name))
|
|
510
|
+
.filter((id): id is string => Boolean(id))
|
|
511
|
+
.join(", ");
|
|
512
|
+
throw new Error(
|
|
513
|
+
`autopg: multiple projects found; set LAKEBASE_PROJECT or config.project. Candidates: ${candidates}`,
|
|
514
|
+
);
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
/**
|
|
518
|
+
* Derive a Lakebase project id from the host repo's `package.json`
|
|
519
|
+
* name (via {@link projectUtils.name}) slugified to satisfy the
|
|
520
|
+
* Lakebase id constraint (`^[a-z][a-z0-9-]{0,61}[a-z0-9]$`).
|
|
521
|
+
*
|
|
522
|
+
* Throws when the slug ends up empty or starts with a digit, since the
|
|
523
|
+
* server would reject it anyway - callers should pass an explicit
|
|
524
|
+
* `autoCreate` id in that case.
|
|
525
|
+
*/
|
|
526
|
+
async function defaultProjectId(): Promise<string> {
|
|
527
|
+
const name = await projectUtils.name();
|
|
528
|
+
const slug = stringUtils.toSlugWithOptions({ maxLength: PROJECT_ID_MAX_LEN }, name);
|
|
529
|
+
if (!slug || !/^[a-z]/.test(slug)) {
|
|
530
|
+
throw new Error(
|
|
531
|
+
`autopg: could not derive a Lakebase project id from project name '${name}'; pass autoCreate explicitly`,
|
|
532
|
+
);
|
|
533
|
+
}
|
|
534
|
+
return slug;
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
/**
|
|
538
|
+
* Ensure a Lakebase project with `projectId` exists. Creates it and
|
|
539
|
+
* waits for the create operation to complete. An `ALREADY_EXISTS`
|
|
540
|
+
* response is treated as success - someone else (a concurrent boot,
|
|
541
|
+
* a sibling process) won the race and the project we wanted is now
|
|
542
|
+
* sitting there ready for downstream pickers.
|
|
543
|
+
*
|
|
544
|
+
* Project creation typically provisions a default `production` branch
|
|
545
|
+
* alongside; downstream pickers handle the rest.
|
|
546
|
+
*/
|
|
547
|
+
async function ensureProject(
|
|
548
|
+
ws: WorkspaceClient,
|
|
549
|
+
projectId: string,
|
|
550
|
+
log: logUtils.Logger,
|
|
551
|
+
): Promise<string> {
|
|
552
|
+
log.warn("autopg: no projects found; creating", { project: projectId });
|
|
553
|
+
try {
|
|
554
|
+
const op = await postJson<Operation>(
|
|
555
|
+
ws,
|
|
556
|
+
`${API_BASE}/projects`,
|
|
557
|
+
{ spec: { pg_version: DEFAULT_PG_VERSION } },
|
|
558
|
+
{ project_id: projectId },
|
|
559
|
+
);
|
|
560
|
+
await waitForOperation(ws, op, log);
|
|
561
|
+
log.info("autopg: created project", { project: projectId });
|
|
562
|
+
} catch (err) {
|
|
563
|
+
if (!isAlreadyExistsError(err)) throw err;
|
|
564
|
+
log.info("autopg: project already exists (race); proceeding", {
|
|
565
|
+
project: projectId,
|
|
566
|
+
});
|
|
567
|
+
}
|
|
568
|
+
return projectId;
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
/**
|
|
572
|
+
* Recognize the Databricks SDK's `ALREADY_EXISTS` failure modes so a
|
|
573
|
+
* lost race during `ensureProject` becomes a no-op instead of an error.
|
|
574
|
+
*
|
|
575
|
+
* The SDK throws `ApiError { errorCode, statusCode }` for structured
|
|
576
|
+
* server errors and `HttpError { code }` for transport-layer 4xx/5xx.
|
|
577
|
+
* Both surface a human message that often carries "already exists" so
|
|
578
|
+
* we use that as a final fallback for forward compatibility.
|
|
579
|
+
*/
|
|
580
|
+
function isAlreadyExistsError(error: unknown): boolean {
|
|
581
|
+
if (!error || typeof error !== "object") return false;
|
|
582
|
+
const e = error as {
|
|
583
|
+
statusCode?: number;
|
|
584
|
+
code?: number;
|
|
585
|
+
errorCode?: string;
|
|
586
|
+
message?: string;
|
|
587
|
+
};
|
|
588
|
+
if (e.statusCode === 409 || e.code === 409) return true;
|
|
589
|
+
if (e.errorCode && /already.?exists/i.test(e.errorCode)) return true;
|
|
590
|
+
if (e.message && /already.?exists/i.test(e.message)) return true;
|
|
591
|
+
return false;
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
/**
|
|
595
|
+
* Poll a Lakebase long-running operation until `done: true`. Returns
|
|
596
|
+
* the final operation envelope (which may carry `response` or `error`).
|
|
597
|
+
*
|
|
598
|
+
* Throws when:
|
|
599
|
+
* - the response carries an `error` field;
|
|
600
|
+
* - `op.name` is missing (nothing to poll);
|
|
601
|
+
* - the timeout elapses before `done: true`.
|
|
602
|
+
*/
|
|
603
|
+
async function waitForOperation(
|
|
604
|
+
ws: WorkspaceClient,
|
|
605
|
+
op: Operation,
|
|
606
|
+
log: logUtils.Logger,
|
|
607
|
+
): Promise<Operation> {
|
|
608
|
+
if (op.done) {
|
|
609
|
+
if (op.error) {
|
|
610
|
+
throw new Error(`autopg: operation failed: ${JSON.stringify(op.error)}`);
|
|
611
|
+
}
|
|
612
|
+
return op;
|
|
613
|
+
}
|
|
614
|
+
const opName = op.name;
|
|
615
|
+
if (!opName) {
|
|
616
|
+
throw new Error("autopg: operation response has no name to poll");
|
|
617
|
+
}
|
|
618
|
+
const start = Date.now();
|
|
619
|
+
while (Date.now() - start < OPERATION_TIMEOUT_MS) {
|
|
620
|
+
await sleep(OPERATION_POLL_MS);
|
|
621
|
+
const current = await getJson<Operation>(ws, `${API_BASE}/${opName}`);
|
|
622
|
+
log.debug("autopg: operation status", { op: opName, done: current.done });
|
|
623
|
+
if (current.done) {
|
|
624
|
+
if (current.error) {
|
|
625
|
+
throw new Error(
|
|
626
|
+
`autopg: operation '${opName}' failed: ${JSON.stringify(current.error)}`,
|
|
627
|
+
);
|
|
628
|
+
}
|
|
629
|
+
return current;
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
throw new Error(
|
|
633
|
+
`autopg: operation '${opName}' did not complete within ${OPERATION_TIMEOUT_MS}ms`,
|
|
634
|
+
);
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
/**
|
|
638
|
+
* Poll `getEndpoint` until the compute reports a usable
|
|
639
|
+
* `status.current_state`. `READY` and `IDLE` are both acceptable -
|
|
640
|
+
* `IDLE` just means the compute has scaled to zero but a connection
|
|
641
|
+
* will wake it. Returns the final endpoint payload (with `hosts.host`).
|
|
642
|
+
*/
|
|
643
|
+
async function waitEndpointReady(
|
|
644
|
+
ws: WorkspaceClient,
|
|
645
|
+
project: string,
|
|
646
|
+
branch: string,
|
|
647
|
+
endpointId: string,
|
|
648
|
+
log: logUtils.Logger,
|
|
649
|
+
): Promise<Endpoint> {
|
|
650
|
+
const start = Date.now();
|
|
651
|
+
let last: Endpoint | null = null;
|
|
652
|
+
while (Date.now() - start < ENDPOINT_READY_TIMEOUT_MS) {
|
|
653
|
+
last = await getEndpoint(ws, project, branch, endpointId);
|
|
654
|
+
const state = last.status?.current_state;
|
|
655
|
+
if (state === "READY" || state === "IDLE") return last;
|
|
656
|
+
if (last.status?.hosts?.host && state !== "INITIALIZING") {
|
|
657
|
+
// Compute is in some other state (STARTING, etc.) but hostname is
|
|
658
|
+
// already published - good enough to connect; lakebase's OAuth
|
|
659
|
+
// token request will wake it.
|
|
660
|
+
return last;
|
|
661
|
+
}
|
|
662
|
+
log.debug("autopg: waiting for endpoint", { endpointId, state });
|
|
663
|
+
await sleep(ENDPOINT_READY_POLL_MS);
|
|
664
|
+
}
|
|
665
|
+
throw new Error(
|
|
666
|
+
`autopg: endpoint '${endpointId}' under projects/${project}/branches/${branch} did not become ready within ${ENDPOINT_READY_TIMEOUT_MS}ms (last state: ${last?.status?.current_state ?? "unknown"})`,
|
|
667
|
+
);
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
/**
|
|
671
|
+
* Pick the default branch for a project. Prefers the branch flagged
|
|
672
|
+
* `status.default: true` (server-side default, typically `production`
|
|
673
|
+
* unless the project owner changed it). Falls back to the only branch
|
|
674
|
+
* when there's exactly one. Otherwise throws with the candidate list.
|
|
675
|
+
*/
|
|
676
|
+
async function pickBranch(
|
|
677
|
+
ws: WorkspaceClient,
|
|
678
|
+
project: string,
|
|
679
|
+
log: logUtils.Logger,
|
|
680
|
+
): Promise<string> {
|
|
681
|
+
const branches = await listBranches(ws, project);
|
|
682
|
+
if (branches.length === 0) {
|
|
683
|
+
throw new Error(
|
|
684
|
+
`autopg: project '${project}' has no branches; cannot resolve a default`,
|
|
685
|
+
);
|
|
686
|
+
}
|
|
687
|
+
const flagged = branches.find((b) => b.status?.default === true);
|
|
688
|
+
const choice =
|
|
689
|
+
branchIdFromName(flagged?.name) ??
|
|
690
|
+
(branches.length === 1 ? branchIdFromName(branches[0]!.name) : undefined);
|
|
691
|
+
if (!choice) {
|
|
692
|
+
const candidates = branches
|
|
693
|
+
.map((b) => branchIdFromName(b.name))
|
|
694
|
+
.filter((id): id is string => Boolean(id))
|
|
695
|
+
.join(", ");
|
|
696
|
+
throw new Error(
|
|
697
|
+
`autopg: project '${project}' has multiple branches and none marked default; set LAKEBASE_BRANCH or config.branch. Candidates: ${candidates}`,
|
|
698
|
+
);
|
|
699
|
+
}
|
|
700
|
+
log.info("autopg: resolved branch", { project, branch: choice });
|
|
701
|
+
return choice;
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
/**
|
|
705
|
+
* Pick the primary endpoint for a (project, branch). Prefers
|
|
706
|
+
* `status.endpoint_type === ENDPOINT_TYPE_READ_WRITE`; falls back to
|
|
707
|
+
* the only endpoint when there's exactly one. Returns `{ name, host }`
|
|
708
|
+
* so the caller can populate both `LAKEBASE_ENDPOINT` and `PGHOST`
|
|
709
|
+
* from a single call.
|
|
710
|
+
*/
|
|
711
|
+
async function pickEndpoint(
|
|
712
|
+
ws: WorkspaceClient,
|
|
713
|
+
project: string,
|
|
714
|
+
branch: string,
|
|
715
|
+
log: logUtils.Logger,
|
|
716
|
+
): Promise<{ name: string; host?: string }> {
|
|
717
|
+
const endpoints = await listEndpoints(ws, project, branch);
|
|
718
|
+
if (endpoints.length === 0) {
|
|
719
|
+
throw new Error(
|
|
720
|
+
`autopg: branch 'projects/${project}/branches/${branch}' has no endpoints; cannot resolve LAKEBASE_ENDPOINT`,
|
|
721
|
+
);
|
|
722
|
+
}
|
|
723
|
+
const primary =
|
|
724
|
+
endpoints.find((e) => e.status?.endpoint_type === "ENDPOINT_TYPE_READ_WRITE") ??
|
|
725
|
+
(endpoints.length === 1 ? endpoints[0] : undefined);
|
|
726
|
+
if (!primary?.name) {
|
|
727
|
+
const names = endpoints.map((e) => e.name).filter(Boolean);
|
|
728
|
+
throw new Error(
|
|
729
|
+
`autopg: branch has no primary READ_WRITE endpoint; set LAKEBASE_ENDPOINT or config.endpoint. Candidates: ${names.join(", ")}`,
|
|
730
|
+
);
|
|
731
|
+
}
|
|
732
|
+
const host = primary.status?.hosts?.host;
|
|
733
|
+
log.info("autopg: resolved endpoint", { endpoint: primary.name, host });
|
|
734
|
+
return { name: primary.name, host };
|
|
735
|
+
}
|
|
736
|
+
|
|
737
|
+
/**
|
|
738
|
+
* Pick the default postgres database for a (project, branch). The
|
|
739
|
+
* Postgres database NAME (`status.postgres_database`) is what
|
|
740
|
+
* `PGDATABASE` needs - this differs from the resource id, which can
|
|
741
|
+
* use a different separator (e.g. resource `databricks-postgres`
|
|
742
|
+
* surfaces as database `databricks_postgres`). Prefers
|
|
743
|
+
* `databricks_postgres` (the Lakebase default), otherwise the only
|
|
744
|
+
* database.
|
|
745
|
+
*/
|
|
746
|
+
async function pickDatabase(
|
|
747
|
+
ws: WorkspaceClient,
|
|
748
|
+
project: string,
|
|
749
|
+
branch: string,
|
|
750
|
+
log: logUtils.Logger,
|
|
751
|
+
): Promise<string> {
|
|
752
|
+
const databases = await listDatabases(ws, project, branch);
|
|
753
|
+
if (databases.length === 0) {
|
|
754
|
+
throw new Error(
|
|
755
|
+
`autopg: branch 'projects/${project}/branches/${branch}' has no databases; cannot resolve PGDATABASE`,
|
|
756
|
+
);
|
|
757
|
+
}
|
|
758
|
+
const names = databases
|
|
759
|
+
.map((d) => d.status?.postgres_database)
|
|
760
|
+
.filter((n): n is string => Boolean(n));
|
|
761
|
+
const choice =
|
|
762
|
+
names.find((n) => n === "databricks_postgres") ??
|
|
763
|
+
(names.length === 1 ? names[0] : undefined);
|
|
764
|
+
if (!choice) {
|
|
765
|
+
throw new Error(
|
|
766
|
+
`autopg: multiple databases and no 'databricks_postgres'; set PGDATABASE or config.database. Candidates: ${names.join(", ")}`,
|
|
767
|
+
);
|
|
768
|
+
}
|
|
769
|
+
log.info("autopg: resolved database", { database: choice });
|
|
770
|
+
return choice;
|
|
771
|
+
}
|