voidforge-build 23.10.0 → 23.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/dist/.claude/agents/bashir-field-medic.md +1 -0
  2. package/dist/.claude/agents/coulson-release.md +3 -0
  3. package/dist/.claude/agents/irulan-historian.md +3 -0
  4. package/dist/.claude/agents/loki-chaos.md +1 -0
  5. package/dist/.claude/agents/picard-architecture.md +3 -0
  6. package/dist/.claude/agents/silver-surfer-herald.md +3 -0
  7. package/dist/.claude/agents/sisko-campaign.md +3 -0
  8. package/dist/.claude/commands/architect.md +38 -0
  9. package/dist/.claude/commands/campaign.md +2 -0
  10. package/dist/.claude/commands/gauntlet.md +11 -0
  11. package/dist/.claude/commands/git.md +49 -6
  12. package/dist/CHANGELOG.md +84 -0
  13. package/dist/CLAUDE.md +13 -4
  14. package/dist/VERSION.md +3 -1
  15. package/dist/docs/methods/AI_INTELLIGENCE.md +15 -0
  16. package/dist/docs/methods/BACKEND_ENGINEER.md +48 -0
  17. package/dist/docs/methods/CAMPAIGN.md +196 -1
  18. package/dist/docs/methods/DEVOPS_ENGINEER.md +16 -0
  19. package/dist/docs/methods/FORGE_KEEPER.md +18 -0
  20. package/dist/docs/methods/GAUNTLET.md +2 -0
  21. package/dist/docs/methods/QA_ENGINEER.md +46 -0
  22. package/dist/docs/methods/RELEASE_MANAGER.md +85 -0
  23. package/dist/docs/methods/SECURITY_AUDITOR.md +53 -0
  24. package/dist/docs/methods/SUB_AGENTS.md +90 -0
  25. package/dist/docs/methods/SYSTEMS_ARCHITECT.md +42 -2
  26. package/dist/docs/methods/TESTING.md +17 -0
  27. package/dist/docs/methods/TIME_VAULT.md +17 -0
  28. package/dist/docs/patterns/adr-verification-gate.md +80 -0
  29. package/dist/docs/patterns/ai-eval.ts +87 -0
  30. package/dist/docs/patterns/ai-prompt-safety.ts +242 -0
  31. package/dist/docs/patterns/audit-log.ts +132 -0
  32. package/dist/docs/patterns/llm-state-dedup.ts +246 -0
  33. package/dist/docs/patterns/middleware.ts +83 -0
  34. package/dist/docs/patterns/multi-tenant-pool-bypass.ts +134 -0
  35. package/dist/docs/patterns/multi-tenant-property-test.ts +127 -0
  36. package/dist/docs/patterns/refactor-extraction.md +96 -0
  37. package/dist/wizard/lib/project-init.js +57 -0
  38. package/package.json +1 -1
@@ -0,0 +1,132 @@
1
+ /**
2
+ * Pattern: Audit Log (system-event NULL trap + integrity)
3
+ *
4
+ * Source: Field report #319 §6. `audit_log.org_id INTEGER NOT NULL DEFAULT 1`
5
+ * rejects explicit NULL inserts. Spec called for `org_id=NULL` for system
6
+ * events; code wrote `None`; PG raised IntegrityError; an `except Exception:
7
+ * pass` swallowed it; the audit row was silently lost on every system event.
8
+ *
9
+ * The audit table cannot be a system of record AND a tenant-scoped table at
10
+ * the same time. This pattern documents the two valid resolutions and the
11
+ * integrity properties any audit pipeline must hold.
12
+ *
13
+ * Pairs with /docs/patterns/financial-transaction.ts (hash-chained append)
14
+ * for higher-stakes audit trails.
15
+ */
16
+
17
+ // ── The Two Valid Patterns ────────────────────────────────────────────────
18
+
19
+ // Pattern 1: Schema relaxation — make org_id nullable, write NULL for system
20
+ // events. Most explicit. Visible in `\d audit_log`. Migration cost.
21
+ //
22
+ // ALTER TABLE audit_log ALTER COLUMN org_id DROP NOT NULL;
23
+ // -- Operators query: WHERE org_id IS NULL
24
+ //
25
+ // Pattern 2: Sentinel + tag — write the placeholder DEFAULT (e.g., 1) plus a
26
+ // `decisions.system_event = true` JSONB flag. Cheaper, reversible. Operators
27
+ // query: WHERE (decisions->>'system_event')::boolean = true.
28
+
29
+ // ── TypeScript implementation (Pattern 2 — sentinel + tag) ───────────────
30
+
31
+ export type AuditEntry = {
32
+ org_id: number; // Schema DEFAULT for system events; real org id otherwise
33
+ user_id: string | null; // null for system events
34
+ action: string;
35
+ resource_type: string;
36
+ resource_id: string | null;
37
+ decisions: AuditDecisions;
38
+ occurred_at: Date;
39
+ };
40
+
41
+ export type AuditDecisions = {
42
+ system_event?: true; // Tag for system-scope writes (Pattern 2)
43
+ reason?: string;
44
+ actor_role?: string;
45
+ // Free-form context — keep keys stable so operator queries don't drift
46
+ [key: string]: unknown;
47
+ };
48
+
49
+ const SYSTEM_ORG_ID_PLACEHOLDER = 1; // Must match the schema DEFAULT
50
+
51
+ export async function writeAudit(
52
+ db: { execute: (sql: string, params: unknown[]) => Promise<void> },
53
+ entry: Omit<AuditEntry, 'occurred_at'>,
54
+ ): Promise<void> {
55
+ // Mark system events explicitly. Pattern 2 invariant: every system_event=true
56
+ // row uses SYSTEM_ORG_ID_PLACEHOLDER as org_id.
57
+ if (entry.decisions.system_event && entry.org_id !== SYSTEM_ORG_ID_PLACEHOLDER) {
58
+ throw new Error(
59
+ `audit-log invariant: system_event=true requires org_id=${SYSTEM_ORG_ID_PLACEHOLDER}, got ${entry.org_id}`,
60
+ );
61
+ }
62
+
63
+ await db.execute(
64
+ `INSERT INTO audit_log (org_id, user_id, action, resource_type, resource_id, decisions, occurred_at)
65
+ VALUES ($1, $2, $3, $4, $5, $6, NOW())`,
66
+ [
67
+ entry.org_id,
68
+ entry.user_id,
69
+ entry.action,
70
+ entry.resource_type,
71
+ entry.resource_id,
72
+ JSON.stringify(entry.decisions),
73
+ ],
74
+ );
75
+ }
76
+
77
+ // Convenience wrappers — make system vs tenant calls obvious at the call site.
78
+
79
+ export const writeSystemAudit = (
80
+ db: Parameters<typeof writeAudit>[0],
81
+ entry: Omit<AuditEntry, 'org_id' | 'occurred_at' | 'user_id' | 'decisions'> & {
82
+ decisions: Omit<AuditDecisions, 'system_event'>;
83
+ },
84
+ ) =>
85
+ writeAudit(db, {
86
+ ...entry,
87
+ org_id: SYSTEM_ORG_ID_PLACEHOLDER,
88
+ user_id: null,
89
+ decisions: { ...entry.decisions, system_event: true },
90
+ });
91
+
92
+ export const writeTenantAudit = (
93
+ db: Parameters<typeof writeAudit>[0],
94
+ entry: Omit<AuditEntry, 'occurred_at'> & { org_id: number; user_id: string },
95
+ ) => writeAudit(db, entry);
96
+
97
+ // ── Integrity properties (assert in tests) ────────────────────────────────
98
+ //
99
+ // 1. NEVER `try { ... } catch { /* ignore */ }` around audit writes.
100
+ // Audit-write failures are themselves the most important class of audit
101
+ // event. If the audit pipeline can fail silently, you have no audit.
102
+ //
103
+ // 2. Audit writes inside the same transaction as the action they describe.
104
+ // A separate transaction risks the action committing while the audit
105
+ // rolls back (or vice versa).
106
+ //
107
+ // 3. Append-only at the application layer (no UPDATE/DELETE on audit_log).
108
+ // Enforce via revoked grants on the runtime role:
109
+ // REVOKE UPDATE, DELETE ON audit_log FROM <runtime_role>;
110
+ //
111
+ // 4. Tests assert: writeSystemAudit + writeTenantAudit produce
112
+ // distinguishable rows. Operator query against `decisions->>'system_event'`
113
+ // must surface system events without false positives from real org=1.
114
+
115
+ // ── Anti-patterns ─────────────────────────────────────────────────────────
116
+ //
117
+ // - `org_id INTEGER NOT NULL DEFAULT N` + `INSERT ... VALUES (NULL, ...)`
118
+ // → IntegrityError. Pick Pattern 1 (drop NOT NULL) or Pattern 2 (write N
119
+ // + tag). Don't try to do both halfway.
120
+ //
121
+ // - System events written with a real user's `org_id` "for convenience."
122
+ // The audit trail conflates platform actions with tenant actions; legal
123
+ // discovery cannot separate them.
124
+ //
125
+ // - JSONB tag without a stable key. `decisions.systemEvent` vs
126
+ // `decisions.system_event` vs `decisions.is_system` — operator queries
127
+ // break across versions. Lock the key in this file and keep it.
128
+ //
129
+ // - Wave 3 convergence (field report #319): Riker, Kenobi, Hawkgirl, Loki
130
+ // each independently flagged the NULL trap. When 3+ reviewers agree on
131
+ // the same finding, it is real, not stylistic — promote to a pattern,
132
+ // not a one-off fix.
@@ -0,0 +1,246 @@
1
+ /**
2
+ * Pattern: LLM State Dedup — IDs are NOT keys
3
+ *
4
+ * Rule: LLM-emitted identifiers are display labels, not primary keys.
5
+ *
6
+ * Why: each LLM invocation is stateless from the model's perspective. Two
7
+ * cycles that propose the same fix will produce DIFFERENT id strings, even
8
+ * for substantively identical commands. The model has no memory of prior
9
+ * ids; it generates a fresh string from current context, drifts every cycle.
10
+ *
11
+ * Field report #330 (threadplex-ops): an hourly run asked Claude to emit
12
+ * `approval_needed[]` entries with an `id` field. The runtime keyed dedup
13
+ * on `id`. Over 5 hours of identical context, Claude emitted ids:
14
+ *
15
+ * `a3f9c2` (cycle 1)
16
+ * `a3f7c2` (cycle 2)
17
+ * `a3f7b2` (cycle 3)
18
+ * `a3f9c1` (cycle 4)
19
+ *
20
+ * Four proposals to stop the same container. Four Telegram approval cards.
21
+ * Zero collapse. The dedup key was wrong by construction.
22
+ *
23
+ * This pattern applies to ANY VoidForge project using an LLM as a decision
24
+ * engine that emits actionable items (approvals, tickets, tasks, queued ops).
25
+ *
26
+ * Agents: Hari Seldon (AI architecture), Bayta Darell (eval), Stark (backend)
27
+ */
28
+
29
+ import { createHash } from 'node:crypto'
30
+
31
+ // --- The rule ---
32
+
33
+ /**
34
+ * Dedup keys must be derived from the OPERATIVE CONTENT, not from the LLM's
35
+ * id field. The operative content is the part of the proposal that, if
36
+ * executed, would produce the same observable outcome.
37
+ *
38
+ * For shell commands: the canonical command string.
39
+ * For HTTP requests: (method, path, normalized body).
40
+ * For database operations: (table, primary key, op-type).
41
+ * For user notifications: (recipient, channel, message-hash).
42
+ */
43
+
44
+ export interface ProposalDedupKey {
45
+ /** Content-hash of the operative payload — the actual dedup key. */
46
+ contentHash: string
47
+
48
+ /**
49
+ * Optional looser key for command-string drift collapse — `docker stop X`,
50
+ * `docker compose stop X`, `docker rm -f X` all collapse to the same
51
+ * (verb, target) tuple even though contentHash differs.
52
+ */
53
+ logicalKey?: string
54
+
55
+ /** The LLM-emitted id, retained as a display label only. NEVER as primary key. */
56
+ displayId?: string
57
+ }
58
+
59
+ // --- Hash the operative content ---
60
+
61
+ /**
62
+ * For shell commands: hash the canonical command string. Normalize whitespace
63
+ * and quoting before hashing so cosmetically-different but semantically-
64
+ * identical commands collapse.
65
+ */
66
+ export function shellCommandHash(command: string): string {
67
+ const canonical = command
68
+ .trim()
69
+ .replace(/\s+/g, ' ') // Collapse whitespace
70
+ .replace(/(['"])\s+/g, '$1 ') // Normalize quote-adjacent spaces
71
+
72
+ return createHash('sha256').update(canonical).digest('hex').slice(0, 12)
73
+ }
74
+
75
+ /**
76
+ * For HTTP request proposals: hash (method, path, sorted-body-keys).
77
+ * Sort body keys so `{a: 1, b: 2}` and `{b: 2, a: 1}` hash identically.
78
+ */
79
+ export function httpRequestHash(req: {
80
+ method: string
81
+ path: string
82
+ body?: Record<string, unknown>
83
+ }): string {
84
+ const sortedBody = req.body
85
+ ? JSON.stringify(req.body, Object.keys(req.body).sort())
86
+ : ''
87
+ const canonical = `${req.method.toUpperCase()} ${req.path} ${sortedBody}`
88
+ return createHash('sha256').update(canonical).digest('hex').slice(0, 12)
89
+ }
90
+
91
+ // --- Logical-key fallback for command-string drift ---
92
+
93
+ /**
94
+ * Some commands have multiple syntactic forms that produce the same outcome.
95
+ * Extract (verb, target) tuple so all forms collapse to the same logical key.
96
+ *
97
+ * Examples that all map to ('stop', 'kometa-run'):
98
+ * docker stop kometa-run
99
+ * docker compose stop kometa-run
100
+ * docker rm -f kometa-run (different verb but same target — flag separately)
101
+ */
102
+ export function dockerLogicalKey(command: string): string | null {
103
+ const verbs = ['stop', 'start', 'restart', 'rm', 'kill', 'pause']
104
+ for (const verb of verbs) {
105
+ const re = new RegExp(`\\bdocker\\s+(?:compose\\s+)?${verb}\\b\\s+(?:-\\S+\\s+)*([\\w.-]+)`, 'i')
106
+ const m = command.match(re)
107
+ if (m) return `${verb}:${m[1]}`
108
+ }
109
+ return null
110
+ }
111
+
112
+ // --- Lifecycle states must enumerate every in-flight status ---
113
+
114
+ /**
115
+ * Even with correct dedup keys, the snapshot used for dedup-comparison must
116
+ * cover ALL operator-visible in-flight states — not just `pending`.
117
+ *
118
+ * Field report #330: the threadplex-ops snapshot filtered only
119
+ * `status == "pending"`, missing `executing` and `interrupted` rows that
120
+ * were also operator-visible. The dedup key was correct but the snapshot
121
+ * was incomplete, producing the same duplication symptom.
122
+ *
123
+ * The lifecycle table below is the reference. Extend per-project.
124
+ */
125
+ export const PROPOSAL_LIFECYCLE_STATES = [
126
+ 'pending', // Awaiting operator approval
127
+ 'executing', // Operator approved; runtime executing the action
128
+ 'interrupted', // Execution paused (operator pause, system pause, retry-backoff)
129
+ 'completed', // Execution succeeded
130
+ 'failed', // Execution failed (terminal — operator must re-issue)
131
+ 'cancelled', // Operator cancelled before execution
132
+ 'expired', // Approval window timed out
133
+ ] as const
134
+
135
+ export type LifecycleState = typeof PROPOSAL_LIFECYCLE_STATES[number]
136
+
137
+ /** In-flight states the dedup snapshot must include to prevent duplicate proposals. */
138
+ export const IN_FLIGHT_STATES: readonly LifecycleState[] = [
139
+ 'pending',
140
+ 'executing',
141
+ 'interrupted',
142
+ ]
143
+
144
+ // --- AUTHORITY-style contract: tell the LLM the key shape ---
145
+
146
+ /**
147
+ * The LLM cannot enforce a dedup contract it doesn't know about. Document
148
+ * the contract in the agent's authority/instruction document so the LLM
149
+ * understands what "same target" means.
150
+ *
151
+ * Example AUTHORITY.md fragment:
152
+ *
153
+ * ## Approval Identifier Contract
154
+ *
155
+ * Each proposal you emit MUST include both:
156
+ *
157
+ * id — a human-readable display label. NOT a key. You may
158
+ * emit any short label that helps the operator scan.
159
+ *
160
+ * cmd_hash — sha256(command)[:12]. The runtime keys dedup on this.
161
+ * Two proposals with the same cmd_hash collapse into one
162
+ * approval card.
163
+ *
164
+ * The runtime also computes a logical_key from the command verb + target
165
+ * name. Proposals with the same logical_key are surfaced as a cluster
166
+ * even if cmd_hash differs (e.g., `docker stop X` and `docker rm -f X`
167
+ * both target X with different verbs — operator sees both, decides once).
168
+ */
169
+
170
+ export const AUTHORITY_FRAGMENT_TEMPLATE = `
171
+ ## Approval Identifier Contract
172
+
173
+ Each proposal MUST include:
174
+
175
+ id — display label. Not a key. You may emit any short label.
176
+ cmd_hash — sha256(command)[:12]. The runtime keys dedup on this.
177
+
178
+ The runtime also computes a logical_key from (verb, target). Proposals
179
+ sharing logical_key are surfaced as a cluster even with different
180
+ cmd_hash values.
181
+ `.trim()
182
+
183
+ // --- Putting it together ---
184
+
185
+ export interface ApprovalProposal {
186
+ id: string // Display only — DO NOT USE AS KEY
187
+ cmdHash: string // Primary dedup key
188
+ logicalKey: string | null // Secondary cluster key
189
+ command: string
190
+ proposedAt: string // ISO timestamp
191
+ state: LifecycleState
192
+ }
193
+
194
+ export function dedupProposals(
195
+ newProposal: { id: string; command: string },
196
+ existing: ApprovalProposal[]
197
+ ): { duplicate: boolean; collapsedInto?: ApprovalProposal; logicalCluster?: ApprovalProposal[] } {
198
+ const cmdHash = shellCommandHash(newProposal.command)
199
+ const logicalKey = dockerLogicalKey(newProposal.command)
200
+
201
+ // Snapshot covers ALL in-flight states — not just pending
202
+ const inFlight = existing.filter((p) => IN_FLIGHT_STATES.includes(p.state))
203
+
204
+ // Hard duplicate: same cmd_hash
205
+ const exact = inFlight.find((p) => p.cmdHash === cmdHash)
206
+ if (exact) {
207
+ return { duplicate: true, collapsedInto: exact }
208
+ }
209
+
210
+ // Soft cluster: same logical_key, different command form
211
+ if (logicalKey) {
212
+ const cluster = inFlight.filter((p) => p.logicalKey === logicalKey)
213
+ if (cluster.length > 0) {
214
+ return { duplicate: false, logicalCluster: cluster }
215
+ }
216
+ }
217
+
218
+ return { duplicate: false }
219
+ }
220
+
221
+ // --- Anti-patterns ---
222
+
223
+ /* ANTI-PATTERN 1: LLM ids as primary keys
224
+ * `INSERT INTO approvals (id, ...) VALUES (?, ...)` where `id` is the
225
+ * LLM-emitted string. Two LLM calls with substantively identical input
226
+ * will produce different ids; the database rows do NOT collapse.
227
+ *
228
+ * Fix: store `cmd_hash` as the PK and `display_id` as a label column.
229
+ */
230
+
231
+ /* ANTI-PATTERN 2: Dedup snapshot filtered to a single state
232
+ * `SELECT * FROM approvals WHERE state = 'pending'` for dedup comparison.
233
+ * Misses `executing` and `interrupted` rows that are operator-visible.
234
+ *
235
+ * Fix: use IN_FLIGHT_STATES list. Document which states are excluded
236
+ * from dedup (typically `completed`, `failed`, `cancelled`, `expired`).
237
+ */
238
+
239
+ /* ANTI-PATTERN 3: Hash the LLM's whole emitted JSON
240
+ * `sha256(JSON.stringify(proposal))` includes display_id, timestamps,
241
+ * reasoning prose — all of which drift per cycle even when the action
242
+ * is identical. Hash explodes; collapse never happens.
243
+ *
244
+ * Fix: hash only the operative payload (the command, the request body,
245
+ * the target identifier — never the LLM's free-text fields).
246
+ */
@@ -154,6 +154,89 @@ export function withRequestLogging(
154
154
  }
155
155
  }
156
156
 
157
+ // --- Hot-path logging gate (fire-once / rate-limited) ---
158
+ //
159
+ // Source: Field report #319 §5. Stark's RlsDeadlineMiddleware originally
160
+ // emitted `logger.critical(...)` on every 503 — at 100 rps × 24h = 8.6M
161
+ // critical-level lines/day. No rate-limit, no fire-once. Would crater the
162
+ // log aggregator and Sentry quota.
163
+ //
164
+ // ANY middleware that emits log lines on a hot path (every request, every
165
+ // connection) MUST gate the emission. Two acceptable patterns:
166
+ //
167
+ // 1. Fire-once flag (preferred for state transitions): emit once when
168
+ // state changes, then suppress until reset. Pair with an audit row
169
+ // + Sentry capture inside the same fire-once branch.
170
+ // 2. Rate-limit window (sample-based): emit at most N per window via a
171
+ // token-bucket or last-emit-timestamp gate.
172
+ //
173
+ // Naked `logger.critical(...)` per-request is a denial-of-service vector
174
+ // against your own observability pipeline.
175
+
176
+ type FireOnceState = { fired: boolean; firedAt: number | null };
177
+ const fireOnceStates = new Map<string, FireOnceState>();
178
+
179
+ /**
180
+ * Fire-once gate. Returns true if the caller should emit; false if
181
+ * emission has already happened for this key (until reset()).
182
+ *
183
+ * Use for state-transition events (deadline tripped, circuit opened,
184
+ * degraded mode entered) where the climactic event matters once.
185
+ */
186
+ export function fireOnce(key: string): boolean {
187
+ const state = fireOnceStates.get(key) ?? { fired: false, firedAt: null };
188
+ if (state.fired) return false;
189
+ state.fired = true;
190
+ state.firedAt = Date.now();
191
+ fireOnceStates.set(key, state);
192
+ return true;
193
+ }
194
+
195
+ export function resetFireOnce(key: string): void {
196
+ fireOnceStates.delete(key);
197
+ }
198
+
199
+ /**
200
+ * Token-bucket rate limiter for hot-path logs. Returns true if the caller
201
+ * should emit; false if the bucket is empty.
202
+ *
203
+ * Use for sampled logging where you want N emissions per window
204
+ * (e.g., 1 per minute, 10 per hour).
205
+ */
206
+ const tokenBuckets = new Map<string, { tokens: number; lastRefill: number }>();
207
+
208
+ export function shouldEmit(
209
+ key: string,
210
+ maxPerWindow: number,
211
+ windowMs: number,
212
+ ): boolean {
213
+ const now = Date.now();
214
+ const bucket = tokenBuckets.get(key) ?? { tokens: maxPerWindow, lastRefill: now };
215
+ const elapsed = now - bucket.lastRefill;
216
+ if (elapsed >= windowMs) {
217
+ bucket.tokens = maxPerWindow;
218
+ bucket.lastRefill = now;
219
+ }
220
+ if (bucket.tokens > 0) {
221
+ bucket.tokens -= 1;
222
+ tokenBuckets.set(key, bucket);
223
+ return true;
224
+ }
225
+ tokenBuckets.set(key, bucket);
226
+ return false;
227
+ }
228
+
229
+ // Usage example: 503 deadline middleware
230
+ //
231
+ // if (deadlinePassed) {
232
+ // if (fireOnce('rls-deadline-tripped')) {
233
+ // logger.fatal({ deadline_iso, evidence }, 'RLS migration deadline tripped');
234
+ // writeAuditRow({ action: 'rls_deadline_tripped', decisions: { ... } });
235
+ // Sentry.captureMessage('rls_deadline_tripped', 'fatal');
236
+ // }
237
+ // return new Response('Service Unavailable', { status: 503 });
238
+ // }
239
+
157
240
  // --- Rate limiting middleware ---
158
241
  // Simple in-memory rate limiter. Replace with Redis for multi-instance.
159
242
  const rateLimitMap = new Map<string, { count: number; resetAt: number }>()
@@ -0,0 +1,134 @@
1
+ /**
2
+ * Pattern: Multi-Tenant Pool Bypass (pre-org-resolution scope)
3
+ *
4
+ * Source: Field report #316 §8 (Union Station, M-04c W2). FORCE RLS with a
5
+ * non-owner runtime role means every connection acquired from the tenant
6
+ * pool MUST have `app.current_org_id` set before the first query. But some
7
+ * code paths legitimately need cross-tenant access:
8
+ *
9
+ * - Auth pre-resolution (looking up which org a session belongs to)
10
+ * - System daemons (queue cleanup, retention sweeps, leader-elected work)
11
+ * - Admin endpoints (cross-tenant reports, ops tooling)
12
+ *
13
+ * These can't set org_id (they don't have one), so they need to bypass the
14
+ * tenant pool entirely and acquire from the admin pool. The
15
+ * `pre_org_resolution_scope` ContextVar wrapper makes this explicit and
16
+ * mechanically enforceable.
17
+ *
18
+ * The TS version below is illustrative; the canonical implementation in
19
+ * Union Station is Python (asyncpg). Same shape ports cleanly.
20
+ */
21
+
22
+ import { AsyncLocalStorage } from 'node:async_hooks';
23
+
24
+ // ── ContextVar / AsyncLocalStorage ────────────────────────────────────────
25
+
26
+ type TenantContext = {
27
+ org_id: number | null; // null when in pre-resolution scope
28
+ pre_resolution: boolean; // true ⇒ acquire from admin pool, not tenant pool
29
+ };
30
+
31
+ const tenantContext = new AsyncLocalStorage<TenantContext>();
32
+
33
+ // ── Tenant scope (per-request, normal path) ──────────────────────────────
34
+
35
+ export async function withTenant<T>(
36
+ org_id: number,
37
+ fn: () => Promise<T>,
38
+ ): Promise<T> {
39
+ return tenantContext.run({ org_id, pre_resolution: false }, fn);
40
+ }
41
+
42
+ // ── Pre-org-resolution scope (cross-tenant or auth lookup) ───────────────
43
+
44
+ export async function preOrgResolutionScope<T>(fn: () => Promise<T>): Promise<T> {
45
+ return tenantContext.run({ org_id: null, pre_resolution: true }, fn);
46
+ }
47
+
48
+ // ── Pool acquisition routes by ContextVar ─────────────────────────────────
49
+
50
+ import type { Pool, PoolClient } from 'pg'; // illustrative — real types vary
51
+
52
+ declare const tenantPool: Pool; // BYPASSRLS=f, RLS enforced
53
+ declare const adminPool: Pool; // BYPASSRLS=t, cross-tenant work
54
+
55
+ export async function acquireConnection(): Promise<PoolClient> {
56
+ const ctx = tenantContext.getStore();
57
+
58
+ if (!ctx) {
59
+ throw new Error(
60
+ 'acquireConnection called outside any tenant context. ' +
61
+ 'Wrap caller with withTenant(orgId, ...) or preOrgResolutionScope(...).',
62
+ );
63
+ }
64
+
65
+ if (ctx.pre_resolution) {
66
+ // Cross-tenant work — acquire from the admin pool.
67
+ return adminPool.connect();
68
+ }
69
+
70
+ // Normal request — acquire from the tenant pool. The pool callback is
71
+ // expected to SET app.current_org_id so RLS policies can reference it.
72
+ if (ctx.org_id === null) {
73
+ throw new Error(
74
+ 'Tenant context missing org_id outside pre_resolution scope. ' +
75
+ 'This indicates a callsite that should have called preOrgResolutionScope().',
76
+ );
77
+ }
78
+ return tenantPool.connect();
79
+ }
80
+
81
+ // ── Usage examples ────────────────────────────────────────────────────────
82
+
83
+ // 1. HTTP middleware (per-request)
84
+ //
85
+ // app.use(async (req, res, next) => {
86
+ // await withTenant(req.user.org_id, () => next());
87
+ // });
88
+ //
89
+ // 2. Daemon (cross-tenant queue cleanup)
90
+ //
91
+ // cron.schedule('*/5 * * * *', async () => {
92
+ // await preOrgResolutionScope(async () => {
93
+ // const conn = await acquireConnection(); // → admin pool
94
+ // await conn.query('DELETE FROM job_queue WHERE completed_at < NOW() - INTERVAL \'30 days\'');
95
+ // conn.release();
96
+ // });
97
+ // });
98
+ //
99
+ // 3. Auth lookup (caller doesn't yet know org_id)
100
+ //
101
+ // async function resolveSession(sessionToken: string): Promise<{ org_id: number; user_id: string }> {
102
+ // return preOrgResolutionScope(async () => {
103
+ // const conn = await acquireConnection(); // → admin pool
104
+ // try {
105
+ // const row = await conn.query(
106
+ // 'SELECT org_id, user_id FROM sessions WHERE token = $1 AND expires_at > NOW()',
107
+ // [sessionToken],
108
+ // );
109
+ // return row.rows[0];
110
+ // } finally {
111
+ // conn.release();
112
+ // }
113
+ // });
114
+ // }
115
+
116
+ // ── Anti-patterns ─────────────────────────────────────────────────────────
117
+ //
118
+ // 1. Acquiring from the tenant pool in a daemon. Without org_id set, the RLS
119
+ // policy denies every query → daemon crashes on first tick. Or worse:
120
+ // the policy uses a fail-open arm and the daemon silently sees zero rows.
121
+ //
122
+ // 2. Bypassing FORCE RLS by hard-coding the connection string with the
123
+ // runtime role's password. The whole point of the admin pool is the
124
+ // BYPASSRLS=t identity — preserve that boundary.
125
+ //
126
+ // 3. preOrgResolutionScope wrapping per-request handlers. The middleware
127
+ // already set the tenant context; switching to admin pool there is a
128
+ // privilege escalation. preOrgResolutionScope is for code paths that
129
+ // legitimately don't have an org_id yet (or never will).
130
+ //
131
+ // 4. Forgetting to wrap lifespan startup. Field report #319 §2: 4 lifespan
132
+ // paths in Union Station's M-05 cutover failed-fast immediately because
133
+ // the RLS-strict role rejected unscoped queries. See BACKEND_ENGINEER.md
134
+ // "Lifespan & Daemon ContextVar Coverage" for the sweep checklist.