@opengeni/db 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-57MLICFR.js +121 -0
- package/dist/chunk-57MLICFR.js.map +1 -0
- package/dist/chunk-OGCE6O2X.js +52 -0
- package/dist/chunk-OGCE6O2X.js.map +1 -0
- package/dist/chunk-PSX56ZTL.js +1093 -0
- package/dist/chunk-PSX56ZTL.js.map +1 -0
- package/dist/chunk-PZ5AY32C.js +10 -0
- package/dist/chunk-PZ5AY32C.js.map +1 -0
- package/dist/index.d.ts +8 -0
- package/dist/index.js +5165 -0
- package/dist/index.js.map +1 -0
- package/dist/migrate.d.ts +40 -0
- package/dist/migrate.js +10 -0
- package/dist/migrate.js.map +1 -0
- package/dist/provision-roles.d.ts +2063 -0
- package/dist/provision-roles.js +8 -0
- package/dist/provision-roles.js.map +1 -0
- package/dist/schema-CaeZQAJQ.d.ts +9705 -0
- package/dist/schema.d.ts +3 -0
- package/dist/schema.js +110 -0
- package/dist/schema.js.map +1 -0
- package/drizzle/0000_initial.sql +179 -0
- package/drizzle/0001_workspace_auth_billing.sql +590 -0
- package/drizzle/0002_packs_and_social.sql +99 -0
- package/drizzle/0003_capability_catalog.sql +73 -0
- package/drizzle/0004_workspace_environments.sql +65 -0
- package/drizzle/0005_session_goals.sql +45 -0
- package/drizzle/0006_workspace_packs.sql +31 -0
- package/drizzle/0007_session_history_items.sql +66 -0
- package/drizzle/0008_session_first_party_mcp_permissions.sql +5 -0
- package/drizzle/0009_goal_sessions_first_party_goals_manage.sql +34 -0
- package/drizzle/0010_session_parent_linkage.sql +30 -0
- package/drizzle/0011_context_compaction.sql +33 -0
- package/drizzle/0012_compaction_summary_fractional_position.sql +19 -0
- package/drizzle/0013_session_compact_requested.sql +16 -0
- package/drizzle/0014_repair_orphaned_function_call_results.sql +125 -0
- package/drizzle/0015_workspace_agent_instructions.sql +17 -0
- package/drizzle/0016_session_create_idempotency.sql +27 -0
- package/drizzle/0017_sandbox_leases.sql +313 -0
- package/drizzle/0018_sandbox_os.sql +89 -0
- package/drizzle/0019_session_stream_acknowledgments.sql +94 -0
- package/drizzle/0020_session_recordings.sql +88 -0
- package/drizzle/0021_sandbox_pty_sessions.sql +70 -0
- package/drizzle/0022_sandbox_lease_terminal_url.sql +32 -0
- package/drizzle/0023_session_title.sql +19 -0
- package/drizzle/0024_codex_subscription_credentials.sql +51 -0
- package/drizzle/0024_sandboxes_enrollments_metrics.sql +262 -0
- package/drizzle/0025_device_enrollment_requests.sql +142 -0
- package/drizzle/0026_device_enrollment_user_code_resolver.sql +47 -0
- package/drizzle/0027_session_working_dir.sql +24 -0
- package/drizzle/0028_codex_multi_account.sql +85 -0
- package/drizzle/0029_session_history_item_producer.sql +31 -0
- package/drizzle/0030_agent_run_state_frozen_codex.sql +35 -0
- package/drizzle/0031_codex_usage_cache.sql +21 -0
- package/drizzle/0032_codex_account_cooldown.sql +18 -0
- package/drizzle/0033_codex_connector_cache.sql +20 -0
- package/drizzle/0034_sandbox_lease_image.sql +21 -0
- package/drizzle/meta/_journal.json +167 -0
- package/package.json +66 -0
- package/src/codex-token-resolver.ts +247 -0
- package/src/environment-crypto.ts +51 -0
- package/src/event-payload-sanitizer.ts +89 -0
- package/src/index.ts +7776 -0
- package/src/migrate.ts +95 -0
- package/src/provision-roles.ts +198 -0
- package/src/schema.ts +1110 -0
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
// Shared, id-addressed, REFRESHING Codex token resolver (P2).
|
|
2
|
+
//
|
|
3
|
+
// Hoisted here from apps/worker/src/activities/codex-auth.ts so BOTH the worker
|
|
4
|
+
// (turn-time bearer for the streamed run) AND the api (the /wham/usage quota-bar
|
|
5
|
+
// reads) drive ONE resolver — no duplicated refresh/CAS/single-flight logic. The
|
|
6
|
+
// worker re-exports buildCodexTokenResolver from this module for back-compat, so
|
|
7
|
+
// the agent-turn.ts call site is unchanged.
|
|
8
|
+
//
|
|
9
|
+
// Why @opengeni/db is the right home: the resolver only orchestrates accessors
|
|
10
|
+
// this package already owns (loadCodexCredentialForRun / recordCodexTokenRefresh /
|
|
11
|
+
// setCodexCredentialStatus / encryptEnvironmentValue) plus pure @opengeni/codex
|
|
12
|
+
// refresh helpers and the @opengeni/config key — keeping the refresh-CAS + RLS
|
|
13
|
+
// invariants co-located with the rows they protect.
|
|
14
|
+
//
|
|
15
|
+
// CROSS-PROCESS SAFETY is preserved unchanged: the single-flight `inflight` map is
|
|
16
|
+
// process-module-scoped, so worker and api each get their own — that is CORRECT
|
|
17
|
+
// (each process coalesces its own concurrent refreshes). The real cross-process
|
|
18
|
+
// guard is the (id, version) CAS inside recordCodexTokenRefresh: if the api
|
|
19
|
+
// refreshes a token while a worker turn refreshes the same account, the loser's
|
|
20
|
+
// CAS writes 0 rows (stale version) and it re-reads the winner's token, so the
|
|
21
|
+
// one-time refresh token is never double-spent. RLS is untouched (every accessor
|
|
22
|
+
// wraps withWorkspaceRls internally).
|
|
23
|
+
|
|
24
|
+
import { environmentsEncryptionKeyBytes, type Settings } from "@opengeni/config";
|
|
25
|
+
import {
|
|
26
|
+
accessTokenExpiry,
|
|
27
|
+
CODEX_CLIENT_VERSION,
|
|
28
|
+
CODEX_REFRESH_FALLBACK_MS,
|
|
29
|
+
CODEX_REFRESH_WINDOW_MS,
|
|
30
|
+
CodexReloginRequired,
|
|
31
|
+
type CodexTokenSnapshot,
|
|
32
|
+
type CodexUsagePayload,
|
|
33
|
+
fetchCodexUsage,
|
|
34
|
+
normalizeCodexUsage,
|
|
35
|
+
refreshCodexToken,
|
|
36
|
+
} from "@opengeni/codex";
|
|
37
|
+
import { encryptEnvironmentValue } from "./environment-crypto";
|
|
38
|
+
import {
|
|
39
|
+
loadCodexCredentialForRun,
|
|
40
|
+
recordCodexAccountUsage,
|
|
41
|
+
recordCodexTokenRefresh,
|
|
42
|
+
setCodexCredentialStatus,
|
|
43
|
+
type CodexCredentialForRun,
|
|
44
|
+
type Database,
|
|
45
|
+
} from "./index";
|
|
46
|
+
|
|
47
|
+
// Single-flight per CREDENTIAL INSTANCE (row id + version), process-module scope.
|
|
48
|
+
// Keying by the loaded credential's id+version — NOT by workspaceId alone (P1-b) —
|
|
49
|
+
// is what makes a disconnect→reconnect safe: a post-reconnect getToken loads a
|
|
50
|
+
// DIFFERENT row (new uuid id) and so gets a distinct key, instead of coalescing
|
|
51
|
+
// onto the OLD in-flight refresh and writing stale rotated tokens over the freshly
|
|
52
|
+
// connected credential. Concurrent calls for the SAME credential still coalesce,
|
|
53
|
+
// so the one-time refresh token is never double-spent.
|
|
54
|
+
const inflight = new Map<string, Promise<CodexTokenSnapshot>>();
|
|
55
|
+
|
|
56
|
+
// Dependencies are injectable so the lifecycle logic (single-flight, staleness,
|
|
57
|
+
// needs_relogin transition) is unit-testable without a database. Production uses
|
|
58
|
+
// the real db + codex functions via the default bag.
|
|
59
|
+
export type CodexAuthDeps = {
|
|
60
|
+
loadCredential: typeof loadCodexCredentialForRun;
|
|
61
|
+
recordRefresh: typeof recordCodexTokenRefresh;
|
|
62
|
+
setStatus: typeof setCodexCredentialStatus;
|
|
63
|
+
refresh: typeof refreshCodexToken;
|
|
64
|
+
encrypt: typeof encryptEnvironmentValue;
|
|
65
|
+
keyBytes: typeof environmentsEncryptionKeyBytes;
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
const defaultDeps: CodexAuthDeps = {
|
|
69
|
+
loadCredential: loadCodexCredentialForRun,
|
|
70
|
+
recordRefresh: recordCodexTokenRefresh,
|
|
71
|
+
setStatus: setCodexCredentialStatus,
|
|
72
|
+
refresh: refreshCodexToken,
|
|
73
|
+
encrypt: encryptEnvironmentValue,
|
|
74
|
+
keyBytes: environmentsEncryptionKeyBytes,
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
export function buildCodexTokenResolver(
|
|
78
|
+
db: Database,
|
|
79
|
+
settings: Settings,
|
|
80
|
+
workspaceId: string,
|
|
81
|
+
// The RESOLVED effective credential id (pin > workspace active), threaded from
|
|
82
|
+
// the worker. A mid-turn switch loads a DIFFERENT row id, gets a distinct
|
|
83
|
+
// single-flight key, and the (id, version) CAS in recordCodexTokenRefresh writes
|
|
84
|
+
// 0 rows against the now-inactive row — so a refresh racing a switch can never
|
|
85
|
+
// clobber the newly-active account. The single-flight map needs zero change.
|
|
86
|
+
credentialId: string,
|
|
87
|
+
deps: CodexAuthDeps = defaultDeps,
|
|
88
|
+
): { getToken: () => Promise<CodexTokenSnapshot>; refresh: () => Promise<CodexTokenSnapshot> } {
|
|
89
|
+
const snapshot = (cred: CodexCredentialForRun): CodexTokenSnapshot => ({
|
|
90
|
+
accessToken: cred.tokens.accessToken,
|
|
91
|
+
chatgptAccountId: cred.chatgptAccountId,
|
|
92
|
+
isFedramp: cred.isFedramp,
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
const performRefresh = async (cred: CodexCredentialForRun): Promise<CodexTokenSnapshot> => {
|
|
96
|
+
try {
|
|
97
|
+
const next = await deps.refresh(cred.tokens.refreshToken);
|
|
98
|
+
const tokens = {
|
|
99
|
+
access_token: next.accessToken ?? cred.tokens.accessToken,
|
|
100
|
+
refresh_token: next.refreshToken ?? cred.tokens.refreshToken,
|
|
101
|
+
id_token: next.idToken ?? cred.tokens.idToken,
|
|
102
|
+
};
|
|
103
|
+
const key = deps.keyBytes(settings);
|
|
104
|
+
if (!key) {
|
|
105
|
+
throw new Error("OPENGENI_ENVIRONMENTS_ENCRYPTION_KEY is not configured");
|
|
106
|
+
}
|
|
107
|
+
// Compare-and-set on the loaded (id, version): if a disconnect→reconnect
|
|
108
|
+
// replaced the row mid-refresh, this writes 0 rows and we must NOT clobber
|
|
109
|
+
// the new credential with our now-defunct rotated tokens.
|
|
110
|
+
const persisted = await deps.recordRefresh(db, {
|
|
111
|
+
id: cred.id,
|
|
112
|
+
version: cred.version,
|
|
113
|
+
workspaceId,
|
|
114
|
+
credentialEncrypted: deps.encrypt(key, JSON.stringify(tokens)),
|
|
115
|
+
expiresAt: accessTokenExpiry(tokens.access_token),
|
|
116
|
+
lastRefreshAt: new Date(),
|
|
117
|
+
});
|
|
118
|
+
if (!persisted) {
|
|
119
|
+
// The row changed under us. Our rotated tokens belong to a stale family;
|
|
120
|
+
// fall back to whatever is connected NOW (a reconnect leaves an active
|
|
121
|
+
// row). If nothing active remains, a relogin is genuinely required.
|
|
122
|
+
const current = await deps.loadCredential(db, settings, workspaceId, credentialId);
|
|
123
|
+
if (current && current.status === "active") {
|
|
124
|
+
return snapshot(current);
|
|
125
|
+
}
|
|
126
|
+
throw new CodexReloginRequired("Codex credential changed during token refresh; reconnect required.");
|
|
127
|
+
}
|
|
128
|
+
return { accessToken: tokens.access_token, chatgptAccountId: cred.chatgptAccountId, isFedramp: cred.isFedramp };
|
|
129
|
+
} catch (error) {
|
|
130
|
+
if (error instanceof CodexReloginRequired) {
|
|
131
|
+
// Stamp needs_relogin ONLY if the row we refreshed is STILL current
|
|
132
|
+
// (compare-and-set on the loaded id+version). A relogin triggered by the
|
|
133
|
+
// OLD token family must never stamp needs_relogin onto a freshly
|
|
134
|
+
// reconnected credential.
|
|
135
|
+
await deps.setStatus(db, workspaceId, "needs_relogin", error.message, { id: cred.id, version: cred.version });
|
|
136
|
+
}
|
|
137
|
+
throw error;
|
|
138
|
+
}
|
|
139
|
+
};
|
|
140
|
+
|
|
141
|
+
// ALL refreshes — whether triggered by proactive staleness (getToken) or by a
|
|
142
|
+
// 401 retry (refresh) — coalesce onto one in-flight promise per credential
|
|
143
|
+
// instance, so concurrent calls can never double-spend the one-time refresh
|
|
144
|
+
// token (which would trigger refresh_token_reused -> needs_relogin).
|
|
145
|
+
const doRefresh = (cred: CodexCredentialForRun): Promise<CodexTokenSnapshot> => {
|
|
146
|
+
const key = `${cred.id}:${cred.version}`;
|
|
147
|
+
const existing = inflight.get(key);
|
|
148
|
+
if (existing) {
|
|
149
|
+
return existing;
|
|
150
|
+
}
|
|
151
|
+
const promise = performRefresh(cred).finally(() => {
|
|
152
|
+
if (inflight.get(key) === promise) {
|
|
153
|
+
inflight.delete(key);
|
|
154
|
+
}
|
|
155
|
+
});
|
|
156
|
+
inflight.set(key, promise);
|
|
157
|
+
return promise;
|
|
158
|
+
};
|
|
159
|
+
|
|
160
|
+
const resolve = async (force: boolean): Promise<CodexTokenSnapshot> => {
|
|
161
|
+
const cred = await deps.loadCredential(db, settings, workspaceId, credentialId);
|
|
162
|
+
if (!cred) {
|
|
163
|
+
throw new CodexReloginRequired("No Codex subscription is connected for this workspace.");
|
|
164
|
+
}
|
|
165
|
+
const exp = cred.expiresAt ?? accessTokenExpiry(cred.tokens.accessToken);
|
|
166
|
+
const stale =
|
|
167
|
+
force ||
|
|
168
|
+
(exp
|
|
169
|
+
? exp.getTime() <= Date.now() + CODEX_REFRESH_WINDOW_MS
|
|
170
|
+
: cred.lastRefreshAt
|
|
171
|
+
? cred.lastRefreshAt.getTime() < Date.now() - CODEX_REFRESH_FALLBACK_MS
|
|
172
|
+
: true);
|
|
173
|
+
return stale ? doRefresh(cred) : snapshot(cred);
|
|
174
|
+
};
|
|
175
|
+
|
|
176
|
+
return { getToken: () => resolve(false), refresh: () => resolve(true) };
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
function errorUsagePayload(reason?: "needs_relogin"): CodexUsagePayload {
|
|
180
|
+
return {
|
|
181
|
+
status: "error",
|
|
182
|
+
planType: null,
|
|
183
|
+
fiveHour: null,
|
|
184
|
+
weekly: null,
|
|
185
|
+
limitReached: false,
|
|
186
|
+
fetchedAt: new Date().toISOString(),
|
|
187
|
+
...(reason ? { reason } : {}),
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* THE single per-account usage path both the api route and an (optional) worker
|
|
193
|
+
* poll call, so the refresh discipline and the cache-write can never drift.
|
|
194
|
+
*
|
|
195
|
+
* 1. resolve a REFRESHING bearer for THIS account (proactive staleness refresh,
|
|
196
|
+
* single-flight, (id,version) CAS-persist) — this is what stops an idle
|
|
197
|
+
* account's expired JWT from 401-ing the usage read.
|
|
198
|
+
* 2. fetch GET /wham/usage with that bearer.
|
|
199
|
+
* 3. normalize (§3) into the P2/P3 contract.
|
|
200
|
+
* 4. on any windows present, write the five usage-cache columns (the TTL clock).
|
|
201
|
+
*
|
|
202
|
+
* A refresh that stamps needs_relogin returns { status:"error", reason } and never
|
|
203
|
+
* hits the provider; a transient refresh error returns a plain error payload.
|
|
204
|
+
*/
|
|
205
|
+
export async function fetchCodexUsageForAccount(
|
|
206
|
+
db: Database,
|
|
207
|
+
settings: Settings,
|
|
208
|
+
workspaceId: string,
|
|
209
|
+
credentialId: string,
|
|
210
|
+
): Promise<CodexUsagePayload> {
|
|
211
|
+
const resolver = buildCodexTokenResolver(db, settings, workspaceId, credentialId);
|
|
212
|
+
let token: CodexTokenSnapshot;
|
|
213
|
+
try {
|
|
214
|
+
token = await resolver.getToken();
|
|
215
|
+
} catch (error) {
|
|
216
|
+
return errorUsagePayload(error instanceof CodexReloginRequired ? "needs_relogin" : undefined);
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
let normalized: CodexUsagePayload;
|
|
220
|
+
try {
|
|
221
|
+
const usage = await fetchCodexUsage({
|
|
222
|
+
accessToken: token.accessToken,
|
|
223
|
+
chatgptAccountId: token.chatgptAccountId,
|
|
224
|
+
isFedramp: token.isFedramp,
|
|
225
|
+
clientVersion: CODEX_CLIENT_VERSION,
|
|
226
|
+
});
|
|
227
|
+
normalized = normalizeCodexUsage(usage.status, usage.payload);
|
|
228
|
+
} catch {
|
|
229
|
+
// A network throw on the /wham/usage read must surface as an error PAYLOAD
|
|
230
|
+
// ({status:"error"} at 200), never an unhandled 500 from the route.
|
|
231
|
+
return errorUsagePayload();
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
if (normalized.fiveHour || normalized.weekly) {
|
|
235
|
+
// Cache-write is best-effort: a disconnect under us (false) or a transient
|
|
236
|
+
// write error must NOT sink the freshly-read usage we are about to return.
|
|
237
|
+
await recordCodexAccountUsage(db, workspaceId, credentialId, {
|
|
238
|
+
primaryUsedPercent: normalized.fiveHour?.percent ?? null,
|
|
239
|
+
primaryResetAt: normalized.fiveHour?.resetAt ? new Date(normalized.fiveHour.resetAt) : null,
|
|
240
|
+
secondaryUsedPercent: normalized.weekly?.percent ?? null,
|
|
241
|
+
secondaryResetAt: normalized.weekly?.resetAt ? new Date(normalized.weekly.resetAt) : null,
|
|
242
|
+
checkedAt: new Date(),
|
|
243
|
+
}).catch(() => undefined);
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
return normalized;
|
|
247
|
+
}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import { createCipheriv, createDecipheriv, randomBytes } from "node:crypto";
|
|
2
|
+
|
|
3
|
+
const VERSION_PREFIX = "v1";
|
|
4
|
+
const IV_BYTES = 12;
|
|
5
|
+
const GCM_TAG_BYTES = 16;
|
|
6
|
+
const KEY_BYTES = 32;
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Encrypts one workspace environment variable value with AES-256-GCM under an
|
|
10
|
+
* operator key held outside Postgres. Output format: `v1:<b64 iv>:<b64 ciphertext||tag>`.
|
|
11
|
+
*/
|
|
12
|
+
export function encryptEnvironmentValue(key: Uint8Array, plaintext: string): string {
|
|
13
|
+
assertKey(key);
|
|
14
|
+
const iv = randomBytes(IV_BYTES);
|
|
15
|
+
const cipher = createCipheriv("aes-256-gcm", key, iv);
|
|
16
|
+
const ciphertext = Buffer.concat([cipher.update(plaintext, "utf8"), cipher.final(), cipher.getAuthTag()]);
|
|
17
|
+
return `${VERSION_PREFIX}:${iv.toString("base64")}:${ciphertext.toString("base64")}`;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Decrypts a stored `v1:` value. Error messages never echo plaintext or
|
|
22
|
+
* ciphertext: unknown versions throw "unsupported environment value format",
|
|
23
|
+
* auth-tag mismatches throw "environment value decryption failed".
|
|
24
|
+
*/
|
|
25
|
+
export function decryptEnvironmentValue(key: Uint8Array, stored: string): string {
|
|
26
|
+
assertKey(key);
|
|
27
|
+
const parts = stored.split(":");
|
|
28
|
+
if (parts.length !== 3 || parts[0] !== VERSION_PREFIX) {
|
|
29
|
+
throw new Error("unsupported environment value format");
|
|
30
|
+
}
|
|
31
|
+
const iv = Buffer.from(parts[1]!, "base64");
|
|
32
|
+
const payload = Buffer.from(parts[2]!, "base64");
|
|
33
|
+
if (iv.length !== IV_BYTES || payload.length <= GCM_TAG_BYTES) {
|
|
34
|
+
throw new Error("unsupported environment value format");
|
|
35
|
+
}
|
|
36
|
+
const tag = payload.subarray(payload.length - GCM_TAG_BYTES);
|
|
37
|
+
const ciphertext = payload.subarray(0, payload.length - GCM_TAG_BYTES);
|
|
38
|
+
const decipher = createDecipheriv("aes-256-gcm", key, iv);
|
|
39
|
+
decipher.setAuthTag(tag);
|
|
40
|
+
try {
|
|
41
|
+
return Buffer.concat([decipher.update(ciphertext), decipher.final()]).toString("utf8");
|
|
42
|
+
} catch {
|
|
43
|
+
throw new Error("environment value decryption failed");
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function assertKey(key: Uint8Array): void {
|
|
48
|
+
if (key.length !== KEY_BYTES) {
|
|
49
|
+
throw new Error("environment encryption key must be exactly 32 bytes");
|
|
50
|
+
}
|
|
51
|
+
}
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Last line of defense against a session event crashing a whole turn.
|
|
3
|
+
*
|
|
4
|
+
* Postgres `text`/`jsonb` cannot store a NUL byte (U+0000) nor lone UTF-16
|
|
5
|
+
* surrogates. Raw exec output routinely carries both -- chrome/crashpad logs,
|
|
6
|
+
* `cat` of a binary, random bytes -- and the worker persists that output verbatim
|
|
7
|
+
* inside `agent.toolCall.output` / `sandbox.command.output` event payloads. When
|
|
8
|
+
* such a payload reaches `INSERT INTO session_events`, the driver rejects it
|
|
9
|
+
* ("Failed query: insert into session_events") and the turn dies.
|
|
10
|
+
*
|
|
11
|
+
* `sanitizeEventPayload` deep-walks any payload value (objects, arrays, nested)
|
|
12
|
+
* and, for every string, strips NUL and rewrites invalid/lone UTF-16 surrogates
|
|
13
|
+
* to the Unicode replacement char (U+FFFD), so the result is always valid UTF-8
|
|
14
|
+
* that jsonb can store. It is cheap and total: only strings are touched, and only
|
|
15
|
+
* the two disallowed classes of code unit -- no meaningful text is lost, no
|
|
16
|
+
* truncation (truncation is handled elsewhere).
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
const REPLACEMENT = "�";
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Strip NUL and repair invalid/lone UTF-16 surrogates in a single string.
|
|
23
|
+
* Returns the input unchanged (same reference) when it is already clean, so the
|
|
24
|
+
* common case allocates nothing.
|
|
25
|
+
*/
|
|
26
|
+
export function sanitizeEventString(value: string): string {
|
|
27
|
+
// Fast path: no NUL and no surrogate code unit at all -> nothing to do.
|
|
28
|
+
// Surrogates live in U+D800..U+DFFF; a quick scan avoids the rebuild cost.
|
|
29
|
+
let needsWork = false;
|
|
30
|
+
for (let i = 0; i < value.length; i++) {
|
|
31
|
+
const code = value.charCodeAt(i);
|
|
32
|
+
if (code === 0x0000 || (code >= 0xd800 && code <= 0xdfff)) {
|
|
33
|
+
needsWork = true;
|
|
34
|
+
break;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
if (!needsWork) {
|
|
38
|
+
return value;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
let out = "";
|
|
42
|
+
for (let i = 0; i < value.length; i++) {
|
|
43
|
+
const code = value.charCodeAt(i);
|
|
44
|
+
if (code === 0x0000) {
|
|
45
|
+
// Drop NUL entirely.
|
|
46
|
+
continue;
|
|
47
|
+
}
|
|
48
|
+
if (code >= 0xd800 && code <= 0xdbff) {
|
|
49
|
+
// High surrogate: valid only when immediately followed by a low surrogate.
|
|
50
|
+
const next = i + 1 < value.length ? value.charCodeAt(i + 1) : 0;
|
|
51
|
+
if (next >= 0xdc00 && next <= 0xdfff) {
|
|
52
|
+
out += value[i]! + value[i + 1]!;
|
|
53
|
+
i += 1;
|
|
54
|
+
continue;
|
|
55
|
+
}
|
|
56
|
+
out += REPLACEMENT;
|
|
57
|
+
continue;
|
|
58
|
+
}
|
|
59
|
+
if (code >= 0xdc00 && code <= 0xdfff) {
|
|
60
|
+
// Lone low surrogate (a valid pair would have been consumed above).
|
|
61
|
+
out += REPLACEMENT;
|
|
62
|
+
continue;
|
|
63
|
+
}
|
|
64
|
+
out += value[i]!;
|
|
65
|
+
}
|
|
66
|
+
return out;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Deep-walk a session event payload and sanitize every string value. Mirrors the
|
|
71
|
+
* shape of the worker redaction deep-walk: objects, arrays, and nested
|
|
72
|
+
* combinations are traversed; non-string leaves pass through untouched. Object
|
|
73
|
+
* keys are sanitized too -- they are jsonb-constrained the same as values.
|
|
74
|
+
*/
|
|
75
|
+
export function sanitizeEventPayload<T>(payload: T): T {
|
|
76
|
+
if (typeof payload === "string") {
|
|
77
|
+
return sanitizeEventString(payload) as unknown as T;
|
|
78
|
+
}
|
|
79
|
+
if (Array.isArray(payload)) {
|
|
80
|
+
return payload.map((item) => sanitizeEventPayload(item)) as unknown as T;
|
|
81
|
+
}
|
|
82
|
+
if (payload && typeof payload === "object") {
|
|
83
|
+
const entries = Object.entries(payload as Record<string, unknown>).map(
|
|
84
|
+
([key, value]) => [sanitizeEventString(key), sanitizeEventPayload(value)] as const,
|
|
85
|
+
);
|
|
86
|
+
return Object.fromEntries(entries) as unknown as T;
|
|
87
|
+
}
|
|
88
|
+
return payload;
|
|
89
|
+
}
|