@opengeni/db 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/dist/chunk-57MLICFR.js +121 -0
  2. package/dist/chunk-57MLICFR.js.map +1 -0
  3. package/dist/chunk-OGCE6O2X.js +52 -0
  4. package/dist/chunk-OGCE6O2X.js.map +1 -0
  5. package/dist/chunk-PSX56ZTL.js +1093 -0
  6. package/dist/chunk-PSX56ZTL.js.map +1 -0
  7. package/dist/chunk-PZ5AY32C.js +10 -0
  8. package/dist/chunk-PZ5AY32C.js.map +1 -0
  9. package/dist/index.d.ts +8 -0
  10. package/dist/index.js +5165 -0
  11. package/dist/index.js.map +1 -0
  12. package/dist/migrate.d.ts +40 -0
  13. package/dist/migrate.js +10 -0
  14. package/dist/migrate.js.map +1 -0
  15. package/dist/provision-roles.d.ts +2063 -0
  16. package/dist/provision-roles.js +8 -0
  17. package/dist/provision-roles.js.map +1 -0
  18. package/dist/schema-CaeZQAJQ.d.ts +9705 -0
  19. package/dist/schema.d.ts +3 -0
  20. package/dist/schema.js +110 -0
  21. package/dist/schema.js.map +1 -0
  22. package/drizzle/0000_initial.sql +179 -0
  23. package/drizzle/0001_workspace_auth_billing.sql +590 -0
  24. package/drizzle/0002_packs_and_social.sql +99 -0
  25. package/drizzle/0003_capability_catalog.sql +73 -0
  26. package/drizzle/0004_workspace_environments.sql +65 -0
  27. package/drizzle/0005_session_goals.sql +45 -0
  28. package/drizzle/0006_workspace_packs.sql +31 -0
  29. package/drizzle/0007_session_history_items.sql +66 -0
  30. package/drizzle/0008_session_first_party_mcp_permissions.sql +5 -0
  31. package/drizzle/0009_goal_sessions_first_party_goals_manage.sql +34 -0
  32. package/drizzle/0010_session_parent_linkage.sql +30 -0
  33. package/drizzle/0011_context_compaction.sql +33 -0
  34. package/drizzle/0012_compaction_summary_fractional_position.sql +19 -0
  35. package/drizzle/0013_session_compact_requested.sql +16 -0
  36. package/drizzle/0014_repair_orphaned_function_call_results.sql +125 -0
  37. package/drizzle/0015_workspace_agent_instructions.sql +17 -0
  38. package/drizzle/0016_session_create_idempotency.sql +27 -0
  39. package/drizzle/0017_sandbox_leases.sql +313 -0
  40. package/drizzle/0018_sandbox_os.sql +89 -0
  41. package/drizzle/0019_session_stream_acknowledgments.sql +94 -0
  42. package/drizzle/0020_session_recordings.sql +88 -0
  43. package/drizzle/0021_sandbox_pty_sessions.sql +70 -0
  44. package/drizzle/0022_sandbox_lease_terminal_url.sql +32 -0
  45. package/drizzle/0023_session_title.sql +19 -0
  46. package/drizzle/0024_codex_subscription_credentials.sql +51 -0
  47. package/drizzle/0024_sandboxes_enrollments_metrics.sql +262 -0
  48. package/drizzle/0025_device_enrollment_requests.sql +142 -0
  49. package/drizzle/0026_device_enrollment_user_code_resolver.sql +47 -0
  50. package/drizzle/0027_session_working_dir.sql +24 -0
  51. package/drizzle/0028_codex_multi_account.sql +85 -0
  52. package/drizzle/0029_session_history_item_producer.sql +31 -0
  53. package/drizzle/0030_agent_run_state_frozen_codex.sql +35 -0
  54. package/drizzle/0031_codex_usage_cache.sql +21 -0
  55. package/drizzle/0032_codex_account_cooldown.sql +18 -0
  56. package/drizzle/0033_codex_connector_cache.sql +20 -0
  57. package/drizzle/0034_sandbox_lease_image.sql +21 -0
  58. package/drizzle/meta/_journal.json +167 -0
  59. package/package.json +66 -0
  60. package/src/codex-token-resolver.ts +247 -0
  61. package/src/environment-crypto.ts +51 -0
  62. package/src/event-payload-sanitizer.ts +89 -0
  63. package/src/index.ts +7776 -0
  64. package/src/migrate.ts +95 -0
  65. package/src/provision-roles.ts +198 -0
  66. package/src/schema.ts +1110 -0
@@ -0,0 +1,247 @@
1
+ // Shared, id-addressed, REFRESHING Codex token resolver (P2).
2
+ //
3
+ // Hoisted here from apps/worker/src/activities/codex-auth.ts so BOTH the worker
4
+ // (turn-time bearer for the streamed run) AND the api (the /wham/usage quota-bar
5
+ // reads) drive ONE resolver — no duplicated refresh/CAS/single-flight logic. The
6
+ // worker re-exports buildCodexTokenResolver from this module for back-compat, so
7
+ // the agent-turn.ts call site is unchanged.
8
+ //
9
+ // Why @opengeni/db is the right home: the resolver only orchestrates accessors
10
+ // this package already owns (loadCodexCredentialForRun / recordCodexTokenRefresh /
11
+ // setCodexCredentialStatus / encryptEnvironmentValue) plus pure @opengeni/codex
12
+ // refresh helpers and the @opengeni/config key — keeping the refresh-CAS + RLS
13
+ // invariants co-located with the rows they protect.
14
+ //
15
+ // CROSS-PROCESS SAFETY is preserved unchanged: the single-flight `inflight` map is
16
+ // process-module-scoped, so worker and api each get their own — that is CORRECT
17
+ // (each process coalesces its own concurrent refreshes). The real cross-process
18
+ // guard is the (id, version) CAS inside recordCodexTokenRefresh: if the api
19
+ // refreshes a token while a worker turn refreshes the same account, the loser's
20
+ // CAS writes 0 rows (stale version) and it re-reads the winner's token, so the
21
+ // one-time refresh token is never double-spent. RLS is untouched (every accessor
22
+ // wraps withWorkspaceRls internally).
23
+
24
+ import { environmentsEncryptionKeyBytes, type Settings } from "@opengeni/config";
25
+ import {
26
+ accessTokenExpiry,
27
+ CODEX_CLIENT_VERSION,
28
+ CODEX_REFRESH_FALLBACK_MS,
29
+ CODEX_REFRESH_WINDOW_MS,
30
+ CodexReloginRequired,
31
+ type CodexTokenSnapshot,
32
+ type CodexUsagePayload,
33
+ fetchCodexUsage,
34
+ normalizeCodexUsage,
35
+ refreshCodexToken,
36
+ } from "@opengeni/codex";
37
+ import { encryptEnvironmentValue } from "./environment-crypto";
38
+ import {
39
+ loadCodexCredentialForRun,
40
+ recordCodexAccountUsage,
41
+ recordCodexTokenRefresh,
42
+ setCodexCredentialStatus,
43
+ type CodexCredentialForRun,
44
+ type Database,
45
+ } from "./index";
46
+
47
+ // Single-flight per CREDENTIAL INSTANCE (row id + version), process-module scope.
48
+ // Keying by the loaded credential's id+version — NOT by workspaceId alone (P1-b) —
49
+ // is what makes a disconnect→reconnect safe: a post-reconnect getToken loads a
50
+ // DIFFERENT row (new uuid id) and so gets a distinct key, instead of coalescing
51
+ // onto the OLD in-flight refresh and writing stale rotated tokens over the freshly
52
+ // connected credential. Concurrent calls for the SAME credential still coalesce,
53
+ // so the one-time refresh token is never double-spent.
54
+ const inflight = new Map<string, Promise<CodexTokenSnapshot>>();
55
+
56
+ // Dependencies are injectable so the lifecycle logic (single-flight, staleness,
57
+ // needs_relogin transition) is unit-testable without a database. Production uses
58
+ // the real db + codex functions via the default bag.
59
+ export type CodexAuthDeps = {
60
+ loadCredential: typeof loadCodexCredentialForRun;
61
+ recordRefresh: typeof recordCodexTokenRefresh;
62
+ setStatus: typeof setCodexCredentialStatus;
63
+ refresh: typeof refreshCodexToken;
64
+ encrypt: typeof encryptEnvironmentValue;
65
+ keyBytes: typeof environmentsEncryptionKeyBytes;
66
+ };
67
+
68
+ const defaultDeps: CodexAuthDeps = {
69
+ loadCredential: loadCodexCredentialForRun,
70
+ recordRefresh: recordCodexTokenRefresh,
71
+ setStatus: setCodexCredentialStatus,
72
+ refresh: refreshCodexToken,
73
+ encrypt: encryptEnvironmentValue,
74
+ keyBytes: environmentsEncryptionKeyBytes,
75
+ };
76
+
77
+ export function buildCodexTokenResolver(
78
+ db: Database,
79
+ settings: Settings,
80
+ workspaceId: string,
81
+ // The RESOLVED effective credential id (pin > workspace active), threaded from
82
+ // the worker. A mid-turn switch loads a DIFFERENT row id, gets a distinct
83
+ // single-flight key, and the (id, version) CAS in recordCodexTokenRefresh writes
84
+ // 0 rows against the now-inactive row — so a refresh racing a switch can never
85
+ // clobber the newly-active account. The single-flight map needs zero change.
86
+ credentialId: string,
87
+ deps: CodexAuthDeps = defaultDeps,
88
+ ): { getToken: () => Promise<CodexTokenSnapshot>; refresh: () => Promise<CodexTokenSnapshot> } {
89
+ const snapshot = (cred: CodexCredentialForRun): CodexTokenSnapshot => ({
90
+ accessToken: cred.tokens.accessToken,
91
+ chatgptAccountId: cred.chatgptAccountId,
92
+ isFedramp: cred.isFedramp,
93
+ });
94
+
95
+ const performRefresh = async (cred: CodexCredentialForRun): Promise<CodexTokenSnapshot> => {
96
+ try {
97
+ const next = await deps.refresh(cred.tokens.refreshToken);
98
+ const tokens = {
99
+ access_token: next.accessToken ?? cred.tokens.accessToken,
100
+ refresh_token: next.refreshToken ?? cred.tokens.refreshToken,
101
+ id_token: next.idToken ?? cred.tokens.idToken,
102
+ };
103
+ const key = deps.keyBytes(settings);
104
+ if (!key) {
105
+ throw new Error("OPENGENI_ENVIRONMENTS_ENCRYPTION_KEY is not configured");
106
+ }
107
+ // Compare-and-set on the loaded (id, version): if a disconnect→reconnect
108
+ // replaced the row mid-refresh, this writes 0 rows and we must NOT clobber
109
+ // the new credential with our now-defunct rotated tokens.
110
+ const persisted = await deps.recordRefresh(db, {
111
+ id: cred.id,
112
+ version: cred.version,
113
+ workspaceId,
114
+ credentialEncrypted: deps.encrypt(key, JSON.stringify(tokens)),
115
+ expiresAt: accessTokenExpiry(tokens.access_token),
116
+ lastRefreshAt: new Date(),
117
+ });
118
+ if (!persisted) {
119
+ // The row changed under us. Our rotated tokens belong to a stale family;
120
+ // fall back to whatever is connected NOW (a reconnect leaves an active
121
+ // row). If nothing active remains, a relogin is genuinely required.
122
+ const current = await deps.loadCredential(db, settings, workspaceId, credentialId);
123
+ if (current && current.status === "active") {
124
+ return snapshot(current);
125
+ }
126
+ throw new CodexReloginRequired("Codex credential changed during token refresh; reconnect required.");
127
+ }
128
+ return { accessToken: tokens.access_token, chatgptAccountId: cred.chatgptAccountId, isFedramp: cred.isFedramp };
129
+ } catch (error) {
130
+ if (error instanceof CodexReloginRequired) {
131
+ // Stamp needs_relogin ONLY if the row we refreshed is STILL current
132
+ // (compare-and-set on the loaded id+version). A relogin triggered by the
133
+ // OLD token family must never stamp needs_relogin onto a freshly
134
+ // reconnected credential.
135
+ await deps.setStatus(db, workspaceId, "needs_relogin", error.message, { id: cred.id, version: cred.version });
136
+ }
137
+ throw error;
138
+ }
139
+ };
140
+
141
+ // ALL refreshes — whether triggered by proactive staleness (getToken) or by a
142
+ // 401 retry (refresh) — coalesce onto one in-flight promise per credential
143
+ // instance, so concurrent calls can never double-spend the one-time refresh
144
+ // token (which would trigger refresh_token_reused -> needs_relogin).
145
+ const doRefresh = (cred: CodexCredentialForRun): Promise<CodexTokenSnapshot> => {
146
+ const key = `${cred.id}:${cred.version}`;
147
+ const existing = inflight.get(key);
148
+ if (existing) {
149
+ return existing;
150
+ }
151
+ const promise = performRefresh(cred).finally(() => {
152
+ if (inflight.get(key) === promise) {
153
+ inflight.delete(key);
154
+ }
155
+ });
156
+ inflight.set(key, promise);
157
+ return promise;
158
+ };
159
+
160
+ const resolve = async (force: boolean): Promise<CodexTokenSnapshot> => {
161
+ const cred = await deps.loadCredential(db, settings, workspaceId, credentialId);
162
+ if (!cred) {
163
+ throw new CodexReloginRequired("No Codex subscription is connected for this workspace.");
164
+ }
165
+ const exp = cred.expiresAt ?? accessTokenExpiry(cred.tokens.accessToken);
166
+ const stale =
167
+ force ||
168
+ (exp
169
+ ? exp.getTime() <= Date.now() + CODEX_REFRESH_WINDOW_MS
170
+ : cred.lastRefreshAt
171
+ ? cred.lastRefreshAt.getTime() < Date.now() - CODEX_REFRESH_FALLBACK_MS
172
+ : true);
173
+ return stale ? doRefresh(cred) : snapshot(cred);
174
+ };
175
+
176
+ return { getToken: () => resolve(false), refresh: () => resolve(true) };
177
+ }
178
+
179
+ function errorUsagePayload(reason?: "needs_relogin"): CodexUsagePayload {
180
+ return {
181
+ status: "error",
182
+ planType: null,
183
+ fiveHour: null,
184
+ weekly: null,
185
+ limitReached: false,
186
+ fetchedAt: new Date().toISOString(),
187
+ ...(reason ? { reason } : {}),
188
+ };
189
+ }
190
+
191
+ /**
192
+ * THE single per-account usage path both the api route and an (optional) worker
193
+ * poll call, so the refresh discipline and the cache-write can never drift.
194
+ *
195
+ * 1. resolve a REFRESHING bearer for THIS account (proactive staleness refresh,
196
+ * single-flight, (id,version) CAS-persist) — this is what stops an idle
197
+ * account's expired JWT from 401-ing the usage read.
198
+ * 2. fetch GET /wham/usage with that bearer.
199
+ * 3. normalize (§3) into the P2/P3 contract.
200
+ * 4. on any windows present, write the five usage-cache columns (the TTL clock).
201
+ *
202
+ * A refresh that stamps needs_relogin returns { status:"error", reason } and never
203
+ * hits the provider; a transient refresh error returns a plain error payload.
204
+ */
205
+ export async function fetchCodexUsageForAccount(
206
+ db: Database,
207
+ settings: Settings,
208
+ workspaceId: string,
209
+ credentialId: string,
210
+ ): Promise<CodexUsagePayload> {
211
+ const resolver = buildCodexTokenResolver(db, settings, workspaceId, credentialId);
212
+ let token: CodexTokenSnapshot;
213
+ try {
214
+ token = await resolver.getToken();
215
+ } catch (error) {
216
+ return errorUsagePayload(error instanceof CodexReloginRequired ? "needs_relogin" : undefined);
217
+ }
218
+
219
+ let normalized: CodexUsagePayload;
220
+ try {
221
+ const usage = await fetchCodexUsage({
222
+ accessToken: token.accessToken,
223
+ chatgptAccountId: token.chatgptAccountId,
224
+ isFedramp: token.isFedramp,
225
+ clientVersion: CODEX_CLIENT_VERSION,
226
+ });
227
+ normalized = normalizeCodexUsage(usage.status, usage.payload);
228
+ } catch {
229
+ // A network throw on the /wham/usage read must surface as an error PAYLOAD
230
+ // ({status:"error"} at 200), never an unhandled 500 from the route.
231
+ return errorUsagePayload();
232
+ }
233
+
234
+ if (normalized.fiveHour || normalized.weekly) {
235
+ // Cache-write is best-effort: a disconnect under us (false) or a transient
236
+ // write error must NOT sink the freshly-read usage we are about to return.
237
+ await recordCodexAccountUsage(db, workspaceId, credentialId, {
238
+ primaryUsedPercent: normalized.fiveHour?.percent ?? null,
239
+ primaryResetAt: normalized.fiveHour?.resetAt ? new Date(normalized.fiveHour.resetAt) : null,
240
+ secondaryUsedPercent: normalized.weekly?.percent ?? null,
241
+ secondaryResetAt: normalized.weekly?.resetAt ? new Date(normalized.weekly.resetAt) : null,
242
+ checkedAt: new Date(),
243
+ }).catch(() => undefined);
244
+ }
245
+
246
+ return normalized;
247
+ }
@@ -0,0 +1,51 @@
1
+ import { createCipheriv, createDecipheriv, randomBytes } from "node:crypto";
2
+
3
+ const VERSION_PREFIX = "v1";
4
+ const IV_BYTES = 12;
5
+ const GCM_TAG_BYTES = 16;
6
+ const KEY_BYTES = 32;
7
+
8
+ /**
9
+ * Encrypts one workspace environment variable value with AES-256-GCM under an
10
+ * operator key held outside Postgres. Output format: `v1:<b64 iv>:<b64 ciphertext||tag>`.
11
+ */
12
+ export function encryptEnvironmentValue(key: Uint8Array, plaintext: string): string {
13
+ assertKey(key);
14
+ const iv = randomBytes(IV_BYTES);
15
+ const cipher = createCipheriv("aes-256-gcm", key, iv);
16
+ const ciphertext = Buffer.concat([cipher.update(plaintext, "utf8"), cipher.final(), cipher.getAuthTag()]);
17
+ return `${VERSION_PREFIX}:${iv.toString("base64")}:${ciphertext.toString("base64")}`;
18
+ }
19
+
20
+ /**
21
+ * Decrypts a stored `v1:` value. Error messages never echo plaintext or
22
+ * ciphertext: unknown versions throw "unsupported environment value format",
23
+ * auth-tag mismatches throw "environment value decryption failed".
24
+ */
25
+ export function decryptEnvironmentValue(key: Uint8Array, stored: string): string {
26
+ assertKey(key);
27
+ const parts = stored.split(":");
28
+ if (parts.length !== 3 || parts[0] !== VERSION_PREFIX) {
29
+ throw new Error("unsupported environment value format");
30
+ }
31
+ const iv = Buffer.from(parts[1]!, "base64");
32
+ const payload = Buffer.from(parts[2]!, "base64");
33
+ if (iv.length !== IV_BYTES || payload.length <= GCM_TAG_BYTES) {
34
+ throw new Error("unsupported environment value format");
35
+ }
36
+ const tag = payload.subarray(payload.length - GCM_TAG_BYTES);
37
+ const ciphertext = payload.subarray(0, payload.length - GCM_TAG_BYTES);
38
+ const decipher = createDecipheriv("aes-256-gcm", key, iv);
39
+ decipher.setAuthTag(tag);
40
+ try {
41
+ return Buffer.concat([decipher.update(ciphertext), decipher.final()]).toString("utf8");
42
+ } catch {
43
+ throw new Error("environment value decryption failed");
44
+ }
45
+ }
46
+
47
+ function assertKey(key: Uint8Array): void {
48
+ if (key.length !== KEY_BYTES) {
49
+ throw new Error("environment encryption key must be exactly 32 bytes");
50
+ }
51
+ }
@@ -0,0 +1,89 @@
1
+ /**
2
+ * Last line of defense against a session event crashing a whole turn.
3
+ *
4
+ * Postgres `text`/`jsonb` cannot store a NUL byte (U+0000) nor lone UTF-16
5
+ * surrogates. Raw exec output routinely carries both -- chrome/crashpad logs,
6
+ * `cat` of a binary, random bytes -- and the worker persists that output verbatim
7
+ * inside `agent.toolCall.output` / `sandbox.command.output` event payloads. When
8
+ * such a payload reaches `INSERT INTO session_events`, the driver rejects it
9
+ * ("Failed query: insert into session_events") and the turn dies.
10
+ *
11
+ * `sanitizeEventPayload` deep-walks any payload value (objects, arrays, nested)
12
+ * and, for every string, strips NUL and rewrites invalid/lone UTF-16 surrogates
13
+ * to the Unicode replacement char (U+FFFD), so the result is always valid UTF-8
14
+ * that jsonb can store. It is cheap and total: only strings are touched, and only
15
+ * the two disallowed classes of code unit -- no meaningful text is lost, no
16
+ * truncation (truncation is handled elsewhere).
17
+ */
18
+
19
+ const REPLACEMENT = "�";
20
+
21
+ /**
22
+ * Strip NUL and repair invalid/lone UTF-16 surrogates in a single string.
23
+ * Returns the input unchanged (same reference) when it is already clean, so the
24
+ * common case allocates nothing.
25
+ */
26
+ export function sanitizeEventString(value: string): string {
27
+ // Fast path: no NUL and no surrogate code unit at all -> nothing to do.
28
+ // Surrogates live in U+D800..U+DFFF; a quick scan avoids the rebuild cost.
29
+ let needsWork = false;
30
+ for (let i = 0; i < value.length; i++) {
31
+ const code = value.charCodeAt(i);
32
+ if (code === 0x0000 || (code >= 0xd800 && code <= 0xdfff)) {
33
+ needsWork = true;
34
+ break;
35
+ }
36
+ }
37
+ if (!needsWork) {
38
+ return value;
39
+ }
40
+
41
+ let out = "";
42
+ for (let i = 0; i < value.length; i++) {
43
+ const code = value.charCodeAt(i);
44
+ if (code === 0x0000) {
45
+ // Drop NUL entirely.
46
+ continue;
47
+ }
48
+ if (code >= 0xd800 && code <= 0xdbff) {
49
+ // High surrogate: valid only when immediately followed by a low surrogate.
50
+ const next = i + 1 < value.length ? value.charCodeAt(i + 1) : 0;
51
+ if (next >= 0xdc00 && next <= 0xdfff) {
52
+ out += value[i]! + value[i + 1]!;
53
+ i += 1;
54
+ continue;
55
+ }
56
+ out += REPLACEMENT;
57
+ continue;
58
+ }
59
+ if (code >= 0xdc00 && code <= 0xdfff) {
60
+ // Lone low surrogate (a valid pair would have been consumed above).
61
+ out += REPLACEMENT;
62
+ continue;
63
+ }
64
+ out += value[i]!;
65
+ }
66
+ return out;
67
+ }
68
+
69
+ /**
70
+ * Deep-walk a session event payload and sanitize every string value. Mirrors the
71
+ * shape of the worker redaction deep-walk: objects, arrays, and nested
72
+ * combinations are traversed; non-string leaves pass through untouched. Object
73
+ * keys are sanitized too -- they are jsonb-constrained the same as values.
74
+ */
75
+ export function sanitizeEventPayload<T>(payload: T): T {
76
+ if (typeof payload === "string") {
77
+ return sanitizeEventString(payload) as unknown as T;
78
+ }
79
+ if (Array.isArray(payload)) {
80
+ return payload.map((item) => sanitizeEventPayload(item)) as unknown as T;
81
+ }
82
+ if (payload && typeof payload === "object") {
83
+ const entries = Object.entries(payload as Record<string, unknown>).map(
84
+ ([key, value]) => [sanitizeEventString(key), sanitizeEventPayload(value)] as const,
85
+ );
86
+ return Object.fromEntries(entries) as unknown as T;
87
+ }
88
+ return payload;
89
+ }