@dbx-tools/appkit-autopg 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/address.ts ADDED
@@ -0,0 +1,148 @@
1
+ /**
2
+ * Flexible address parser for Lakebase Postgres connection inputs.
3
+ *
4
+ * Accepts whatever shape a user is likely to paste into
5
+ * `LAKEBASE_ENDPOINT` (or the matching config field) and extracts
6
+ * every recognizable piece. Whatever it can't recover is left for the
7
+ * REST resolver to discover.
8
+ *
9
+ * Recognized formats:
10
+ *
11
+ * - **Postgres URI** -
12
+ * `postgresql://user@host:port/db?sslmode=require` (also `postgres://`).
13
+ * Yields `user`, `host`, `port`, `database`, `sslMode`.
14
+ *
15
+ * - **Canonical endpoint resource path** -
16
+ * `projects/{p}/branches/{b}/endpoints/{e}` -
17
+ * yields `project`, `branch`, `endpointId`, and the original string as
18
+ * `endpoint` (already in lakebase's expected form).
19
+ *
20
+ * - **Database resource path** -
21
+ * `projects/{p}/branches/{b}/databases/{d}` -
22
+ * yields `project`, `branch`. The database leaf isn't surfaced because
23
+ * it's a resource id, not the Postgres database name; the resolver
24
+ * will look up the real `status.postgres_database` value via REST.
25
+ *
26
+ * - **Branch resource path** -
27
+ * `projects/{p}/branches/{b}` - yields `project`, `branch`.
28
+ *
29
+ * - **Project resource path** -
30
+ * `projects/{p}` - yields `project`.
31
+ *
32
+ * - **Bare hostname** -
33
+ * `ep-steep-forest-e199v43w.database.eastus2.azuredatabricks.net` -
34
+ * yields `host` only; the resolver reverse-looks up the owning
35
+ * endpoint to recover the resource path.
36
+ *
37
+ * - **Bare project id** -
38
+ * `dbx-tools-demo` (1-63 chars, lowercase letters/digits/hyphens) -
39
+ * yields `project`.
40
+ *
41
+ * Returns an empty object for inputs it doesn't recognize.
42
+ */
43
+
44
+ import type { SslMode } from "./resolver.js";
45
+
46
+ export interface ParsedAddress {
47
+ /** Lakebase project id. */
48
+ project?: string;
49
+ /** Branch id within the project. */
50
+ branch?: string;
51
+ /** Endpoint leaf id (last segment of an endpoint resource path). */
52
+ endpointId?: string;
53
+ /** Canonical endpoint resource path; only set for matching inputs. */
54
+ endpoint?: string;
55
+ /** Postgres database name (PGDATABASE) when parsed from a URI path. */
56
+ database?: string;
57
+ /** Postgres hostname. */
58
+ host?: string;
59
+ /** Postgres port. */
60
+ port?: number;
61
+ /** Postgres user (URI-decoded if encoded). */
62
+ user?: string;
63
+ /** Postgres TLS mode. */
64
+ sslMode?: SslMode;
65
+ }
66
+
67
+ const URL_SCHEME_RE = /^(postgres|postgresql):\/\//i;
68
+ const RESOURCE_ENDPOINT_RE =
69
+ /^projects\/([^/]+)\/branches\/([^/]+)\/endpoints\/([^/]+)$/;
70
+ const RESOURCE_DATABASE_RE =
71
+ /^projects\/([^/]+)\/branches\/([^/]+)\/databases\/([^/]+)$/;
72
+ const RESOURCE_BRANCH_RE = /^projects\/([^/]+)\/branches\/([^/]+)$/;
73
+ const RESOURCE_PROJECT_RE = /^projects\/([^/]+)$/;
74
+ const PROJECT_ID_RE = /^[a-z][a-z0-9-]{0,61}[a-z0-9]$|^[a-z]$/;
75
+ const HOSTNAME_HINT_RE = /^[a-z0-9][a-z0-9-]*(\.[a-z0-9][a-z0-9-]*)+$/i;
76
+
77
+ /**
78
+ * Parse a Lakebase connection input into whatever pieces it carries.
79
+ * See module docstring for the supported formats. Returns `{}` for
80
+ * `undefined`, empty strings, and unrecognized inputs.
81
+ */
82
+ export function parseAddress(input: string | undefined | null): ParsedAddress {
83
+ if (!input) return {};
84
+ const s = input.trim();
85
+ if (!s) return {};
86
+
87
+ if (URL_SCHEME_RE.test(s)) return parseUri(s);
88
+ if (s.startsWith("projects/")) return parseResourcePath(s);
89
+ // Resource ids never contain dots; a dotted input must be a hostname.
90
+ if (HOSTNAME_HINT_RE.test(s) && s.includes(".")) return { host: s };
91
+ if (PROJECT_ID_RE.test(s)) return { project: s };
92
+ return {};
93
+ }
94
+
95
+ function parseUri(s: string): ParsedAddress {
96
+ let url: URL;
97
+ try {
98
+ url = new URL(s);
99
+ } catch {
100
+ return {};
101
+ }
102
+ const result: ParsedAddress = {};
103
+ if (url.hostname) result.host = url.hostname;
104
+ if (url.port) {
105
+ const port = Number.parseInt(url.port, 10);
106
+ if (!Number.isNaN(port)) result.port = port;
107
+ }
108
+ if (url.username) {
109
+ try {
110
+ result.user = decodeURIComponent(url.username);
111
+ } catch {
112
+ // Malformed percent-escape; keep the raw form.
113
+ result.user = url.username;
114
+ }
115
+ }
116
+ const db = url.pathname.replace(/^\//, "");
117
+ if (db) result.database = decodeURIComponent(db);
118
+ // Postgres clients accept `sslmode`; URL params are case-sensitive but
119
+ // we tolerate either since users paste both.
120
+ const sslmodeRaw = url.searchParams.get("sslmode") ?? url.searchParams.get("sslMode");
121
+ const sslmode = sslmodeRaw?.toLowerCase();
122
+ if (sslmode === "require" || sslmode === "disable" || sslmode === "prefer") {
123
+ result.sslMode = sslmode;
124
+ }
125
+ return result;
126
+ }
127
+
128
+ function parseResourcePath(s: string): ParsedAddress {
129
+ const ep = RESOURCE_ENDPOINT_RE.exec(s);
130
+ if (ep) {
131
+ return {
132
+ project: ep[1],
133
+ branch: ep[2],
134
+ endpointId: ep[3],
135
+ endpoint: s,
136
+ };
137
+ }
138
+ // databases/{d} is a resource id (often kebab-case), not the actual
139
+ // Postgres database name. Surface project + branch only; the resolver
140
+ // will fetch the real `postgres_database` value.
141
+ const db = RESOURCE_DATABASE_RE.exec(s);
142
+ if (db) return { project: db[1], branch: db[2] };
143
+ const br = RESOURCE_BRANCH_RE.exec(s);
144
+ if (br) return { project: br[1], branch: br[2] };
145
+ const pr = RESOURCE_PROJECT_RE.exec(s);
146
+ if (pr) return { project: pr[1] };
147
+ return {};
148
+ }
package/src/autopg.ts ADDED
@@ -0,0 +1,93 @@
1
+ /**
2
+ * Top-level Lakebase auto-discovery helper.
3
+ *
4
+ * Read this once at process startup BEFORE `createApp(...)` so the
5
+ * `lakebase` plugin (and anyone else who reads `process.env` during
6
+ * `setup()`) sees a fully-populated environment:
7
+ *
8
+ * ```ts
9
+ * import { autopg } from "@dbx-tools/appkit-autopg";
10
+ * import { createApp, lakebase, server } from "@databricks/appkit";
11
+ *
12
+ * await autopg(); // resolves + writes process.env
13
+ * await createApp({ plugins: [lakebase(), server()] });
14
+ * ```
15
+ *
16
+ * `autopg` is intentionally NOT an AppKit plugin. AppKit's `static phase`
17
+ * field only orders plugin `setup()` invocation, not async completion -
18
+ * `lakebase.setup()` calls `parsePoolConfig` synchronously after its
19
+ * first `await` and would throw on `PGHOST` before any sibling plugin's
20
+ * REST resolution could finish. Awaiting `autopg()` upfront sidesteps
21
+ * the race entirely.
22
+ *
23
+ * Inputs flow in this priority order:
24
+ * 1. Explicit `config.<field>` argument
25
+ * 2. Matching env var (`LAKEBASE_PROJECT`, `LAKEBASE_BRANCH`,
26
+ * `LAKEBASE_ENDPOINT`, `PGHOST`, `PGDATABASE`, `PGPORT`, `PGSSLMODE`)
27
+ * 3. Derived from the Lakebase Autoscaling REST API under
28
+ * `/api/2.0/postgres/` via the Databricks workspace client
29
+ *
30
+ * Resolved values are written back to `process.env` (only filling gaps;
31
+ * existing values are preserved) so the downstream `lakebase` plugin
32
+ * picks them up. Pass `{ exportEnv: false }` to keep `process.env`
33
+ * untouched and just inspect the returned record.
34
+ */
35
+
36
+ import { logUtils } from "@dbx-tools/appkit-shared";
37
+
38
+ import {
39
+ applyToEnv,
40
+ resolveConnection,
41
+ type Resolved,
42
+ type ResolverInputs,
43
+ } from "./resolver.js";
44
+
45
+ /** Options accepted by {@link autopg}. */
46
+ export interface AutopgOptions extends ResolverInputs {
47
+ /**
48
+ * When `true` (the default), resolved values are written to
49
+ * `process.env` so the `lakebase` plugin sees them at startup.
50
+ * Set to `false` to leave `process.env` untouched and just receive
51
+ * the resolved record back.
52
+ */
53
+ exportEnv?: boolean;
54
+ }
55
+
56
+ /**
57
+ * Resolve Lakebase Postgres connection info from config + env (and the
58
+ * Databricks REST API when needed), write the resolved values to
59
+ * `process.env`, and return the fully-populated record.
60
+ *
61
+ * Always safe to call: when env already provides every field, it
62
+ * returns immediately without any network traffic.
63
+ *
64
+ * @throws when a `project` is set (directly or via env) but the
65
+ * Databricks API returns no branches / endpoints / databases to
66
+ * choose from. The error message lists the available candidates so
67
+ * the caller can pin the right one via env or config.
68
+ */
69
+ export async function autopg(opts: AutopgOptions = {}): Promise<Resolved> {
70
+ const { exportEnv = true, ...inputs } = opts;
71
+ const log = logUtils.logger("autopg");
72
+ const resolved = await resolveConnection(inputs, log);
73
+ if (exportEnv) {
74
+ applyToEnv(resolved);
75
+ log.info("env updated", redactForLog(resolved));
76
+ } else {
77
+ log.info("resolved (env untouched)", redactForLog(resolved));
78
+ }
79
+ return resolved;
80
+ }
81
+
82
+ /** Strip resolved record to log-safe primitive fields. */
83
+ function redactForLog(resolved: Resolved): Record<string, unknown> {
84
+ return {
85
+ project: resolved.project,
86
+ branch: resolved.branch,
87
+ endpoint: resolved.endpoint,
88
+ database: resolved.database,
89
+ host: resolved.host,
90
+ port: resolved.port,
91
+ sslMode: resolved.sslMode,
92
+ };
93
+ }