@garethdaine/agentops 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. package/.claude-plugin/plugin.json +10 -0
  2. package/LICENSE +21 -0
  3. package/README.md +410 -0
  4. package/agents/architecture-researcher.md +115 -0
  5. package/agents/code-critic.md +190 -0
  6. package/agents/delegation-router.md +40 -0
  7. package/agents/feature-researcher.md +117 -0
  8. package/agents/interrogator.md +11 -0
  9. package/agents/pitfalls-researcher.md +112 -0
  10. package/agents/plan-validator.md +173 -0
  11. package/agents/proposer.md +61 -0
  12. package/agents/security-reviewer.md +189 -0
  13. package/agents/skill-builder.md +43 -0
  14. package/agents/spec-compliance-reviewer.md +154 -0
  15. package/agents/stack-researcher.md +89 -0
  16. package/commands/build.md +766 -0
  17. package/commands/code-analysis.md +39 -0
  18. package/commands/code-field.md +22 -0
  19. package/commands/compliance-check.md +34 -0
  20. package/commands/configure.md +178 -0
  21. package/commands/cost-report.md +17 -0
  22. package/commands/enterprise/adr.md +78 -0
  23. package/commands/enterprise/brainstorm.md +461 -0
  24. package/commands/enterprise/design.md +203 -0
  25. package/commands/enterprise/dev-setup.md +136 -0
  26. package/commands/enterprise/docker-dev.md +229 -0
  27. package/commands/enterprise/e2e.md +233 -0
  28. package/commands/enterprise/feature.md +218 -0
  29. package/commands/enterprise/gap-analysis.md +204 -0
  30. package/commands/enterprise/handover.md +195 -0
  31. package/commands/enterprise/herd.md +152 -0
  32. package/commands/enterprise/knowledge.md +173 -0
  33. package/commands/enterprise/onboard.md +86 -0
  34. package/commands/enterprise/qa-check.md +80 -0
  35. package/commands/enterprise/reason.md +196 -0
  36. package/commands/enterprise/review.md +177 -0
  37. package/commands/enterprise/scaffold.md +153 -0
  38. package/commands/enterprise/status-report.md +101 -0
  39. package/commands/enterprise/tech-catalog.md +170 -0
  40. package/commands/enterprise/test-gen.md +138 -0
  41. package/commands/evolve.md +39 -0
  42. package/commands/flags.md +44 -0
  43. package/commands/interrogate.md +263 -0
  44. package/commands/lesson.md +15 -0
  45. package/commands/lessons.md +10 -0
  46. package/commands/plan.md +44 -0
  47. package/commands/prune.md +27 -0
  48. package/commands/star.md +17 -0
  49. package/commands/supply-chain-scan.md +44 -0
  50. package/commands/unicode-scan.md +63 -0
  51. package/commands/verify.md +41 -0
  52. package/commands/workflow.md +436 -0
  53. package/hooks/ai-guardrails.sh +114 -0
  54. package/hooks/audit-log.sh +26 -0
  55. package/hooks/auto-delegate.sh +45 -0
  56. package/hooks/auto-evolve.sh +22 -0
  57. package/hooks/auto-lesson.sh +26 -0
  58. package/hooks/auto-plan.sh +59 -0
  59. package/hooks/auto-test.sh +46 -0
  60. package/hooks/auto-verify.sh +30 -0
  61. package/hooks/budget-check.sh +24 -0
  62. package/hooks/code-field-preamble.sh +30 -0
  63. package/hooks/compliance-gate.sh +50 -0
  64. package/hooks/content-trust.sh +22 -0
  65. package/hooks/credential-redact.sh +23 -0
  66. package/hooks/delegation-trust.sh +15 -0
  67. package/hooks/detect-test-run.sh +19 -0
  68. package/hooks/enforcement-lib.sh +60 -0
  69. package/hooks/evolve-gate.sh +32 -0
  70. package/hooks/evolve-lib.sh +32 -0
  71. package/hooks/exfiltration-check.sh +67 -0
  72. package/hooks/failure-collector.sh +27 -0
  73. package/hooks/feature-flags.sh +67 -0
  74. package/hooks/file-provenance.sh +31 -0
  75. package/hooks/flag-utils.sh +36 -0
  76. package/hooks/hooks.json +145 -0
  77. package/hooks/injection-scan.sh +58 -0
  78. package/hooks/integrity-verify.sh +91 -0
  79. package/hooks/lessons-check.sh +17 -0
  80. package/hooks/lockfile-audit.sh +109 -0
  81. package/hooks/patterns-lib.sh +22 -0
  82. package/hooks/plan-gate.sh +18 -0
  83. package/hooks/redact-lib.sh +15 -0
  84. package/hooks/runtime-mode.sh +56 -0
  85. package/hooks/session-cleanup.sh +74 -0
  86. package/hooks/skill-validator.sh +28 -0
  87. package/hooks/standards-enforce.sh +106 -0
  88. package/hooks/star-gate.sh +93 -0
  89. package/hooks/star-preamble.sh +10 -0
  90. package/hooks/telemetry.sh +33 -0
  91. package/hooks/todo-prune.sh +84 -0
  92. package/hooks/unicode-firewall.sh +122 -0
  93. package/hooks/unicode-lib.sh +66 -0
  94. package/hooks/unicode-scan-session.sh +96 -0
  95. package/hooks/validate-command.sh +103 -0
  96. package/hooks/validate-env.sh +51 -0
  97. package/hooks/validate-path.sh +81 -0
  98. package/package.json +40 -0
  99. package/settings.json +6 -0
  100. package/templates/ai-config/tool-standards.md +56 -0
  101. package/templates/architecture/api-first.md +192 -0
  102. package/templates/architecture/auth-patterns.md +302 -0
  103. package/templates/architecture/caching-strategy.md +359 -0
  104. package/templates/architecture/database-patterns.md +347 -0
  105. package/templates/architecture/event-driven.md +252 -0
  106. package/templates/architecture/integration-patterns.md +185 -0
  107. package/templates/architecture/multi-tenancy.md +104 -0
  108. package/templates/architecture/service-boundaries.md +200 -0
  109. package/templates/build/brief-template.md +86 -0
  110. package/templates/build/summary-template.md +100 -0
  111. package/templates/build/task-plan-template.md +133 -0
  112. package/templates/communication/effort-estimate.md +54 -0
  113. package/templates/communication/incident-response.md +59 -0
  114. package/templates/communication/post-mortem.md +109 -0
  115. package/templates/communication/risk-register.md +43 -0
  116. package/templates/communication/sprint-demo-checklist.md +64 -0
  117. package/templates/communication/stakeholder-presentation-outline.md +84 -0
  118. package/templates/communication/technical-proposal.md +77 -0
  119. package/templates/delivery/deployment/deployment-checklist.md +49 -0
  120. package/templates/delivery/design/solution-design-checklist.md +37 -0
  121. package/templates/delivery/discovery/stakeholder-questions.md +33 -0
  122. package/templates/delivery/handover/knowledge-transfer-checklist.md +75 -0
  123. package/templates/delivery/handover/operational-runbook.md +117 -0
  124. package/templates/delivery/handover/support-escalation-matrix.md +56 -0
  125. package/templates/delivery/implementation/blocker-escalation-template.md +55 -0
  126. package/templates/delivery/implementation/sprint-planning-template.md +49 -0
  127. package/templates/delivery/implementation/task-decomposition-guide.md +59 -0
  128. package/templates/delivery/qa/test-plan-template.md +76 -0
  129. package/templates/delivery/qa/test-results-template.md +55 -0
  130. package/templates/delivery/qa/uat-signoff-template.md +44 -0
  131. package/templates/governance/codeowners.md +60 -0
  132. package/templates/integration/adapter-pattern.md +160 -0
  133. package/templates/scaffolds/env-validation.md +85 -0
  134. package/templates/scaffolds/error-handling.md +171 -0
  135. package/templates/scaffolds/graceful-shutdown.md +139 -0
  136. package/templates/scaffolds/health-check.md +109 -0
  137. package/templates/scaffolds/structured-logging.md +134 -0
  138. package/templates/standards/engineering-standards.md +413 -0
  139. package/templates/standards/standards-checklist.md +125 -0
  140. package/templates/tech-catalog.json +663 -0
  141. package/templates/utilities/project-detection.md +75 -0
  142. package/templates/utilities/requirements-collection.md +68 -0
  143. package/templates/utilities/template-rendering.md +81 -0
  144. package/templates/workflows/architecture-decision.md +90 -0
  145. package/templates/workflows/bug-investigation.md +83 -0
  146. package/templates/workflows/feature-implementation.md +80 -0
  147. package/templates/workflows/refactoring.md +83 -0
  148. package/templates/workflows/spike-exploration.md +82 -0
@@ -0,0 +1,359 @@
1
+ # Architecture Pattern: Caching Strategy
2
+
3
+ ## When to Use
4
+
5
+ - Read-heavy workloads where the same data is requested repeatedly
6
+ - Expensive computations or queries that can tolerate slightly stale results
7
+ - Reducing latency for frequently accessed resources
8
+ - Protecting backend services from traffic spikes
9
+
10
+ ## Pattern Description
11
+
12
+ Caching stores computed results closer to the consumer to avoid redundant work. Effective caching requires deliberate decisions about what to cache, where to cache it, how long entries remain valid, and how to invalidate them when the source data changes. The hardest part of caching is not adding it — it is invalidating it correctly.
13
+
14
+ ## Cache Layers
15
+
16
+ ```typescript
17
+ /**
18
+ * Layer 1: Application memory (fastest, per-process, lost on restart)
19
+ * Layer 2: Redis / Memcached (shared across processes, survives restarts)
20
+ * Layer 3: CDN (edge cache for static and semi-static content)
21
+ *
22
+ * Check layers in order: memory -> Redis -> origin
23
+ */
24
+
25
+ export interface CacheLayer {
26
+ get<T>(key: string): Promise<T | null>;
27
+ set<T>(key: string, value: T, ttlSeconds: number): Promise<void>;
28
+ delete(key: string): Promise<void>;
29
+ }
30
+
31
+ export class TieredCache implements CacheLayer {
32
+ constructor(
33
+ private memory: CacheLayer,
34
+ private redis: CacheLayer,
35
+ ) {}
36
+
37
+ async get<T>(key: string): Promise<T | null> {
38
+ // Check local memory first (sub-millisecond)
39
+ const memResult = await this.memory.get<T>(key);
40
+ if (memResult !== null) return memResult;
41
+
42
+ // Fall back to Redis (1-2ms network hop)
43
+ const redisResult = await this.redis.get<T>(key);
44
+ if (redisResult !== null) {
45
+ // Promote to memory cache with a shorter TTL
46
+ await this.memory.set(key, redisResult, 30);
47
+ return redisResult;
48
+ }
49
+
50
+ return null;
51
+ }
52
+
53
+ async set<T>(key: string, value: T, ttlSeconds: number): Promise<void> {
54
+ await Promise.all([
55
+ this.memory.set(key, value, Math.min(ttlSeconds, 60)),
56
+ this.redis.set(key, value, ttlSeconds),
57
+ ]);
58
+ }
59
+
60
+ async delete(key: string): Promise<void> {
61
+ await Promise.all([
62
+ this.memory.delete(key),
63
+ this.redis.delete(key),
64
+ ]);
65
+ }
66
+ }
67
+ ```
68
+
69
+ ### In-Memory Cache Implementation
70
+
71
+ ```typescript
72
+ export class MemoryCache implements CacheLayer {
73
+ private store = new Map<string, { value: unknown; expiresAt: number }>();
74
+
75
+ async get<T>(key: string): Promise<T | null> {
76
+ const entry = this.store.get(key);
77
+ if (!entry) return null;
78
+ if (Date.now() > entry.expiresAt) {
79
+ this.store.delete(key);
80
+ return null;
81
+ }
82
+ return entry.value as T;
83
+ }
84
+
85
+ async set<T>(key: string, value: T, ttlSeconds: number): Promise<void> {
86
+ this.store.set(key, {
87
+ value,
88
+ expiresAt: Date.now() + ttlSeconds * 1000,
89
+ });
90
+ }
91
+
92
+ async delete(key: string): Promise<void> {
93
+ this.store.delete(key);
94
+ }
95
+ }
96
+ ```
97
+
98
+ ## Cache Key Design
99
+
100
+ ```typescript
101
+ /**
102
+ * Cache key rules:
103
+ * 1. Include all parameters that affect the result
104
+ * 2. Prefix with entity type for easy bulk invalidation
105
+ * 3. Include tenant ID to prevent cross-tenant data leaks
106
+ * 4. Keep keys human-readable for debugging
107
+ * 5. Use a consistent separator (colon is conventional for Redis)
108
+ */
109
+
110
+ export function buildCacheKey(parts: {
111
+ entity: string;
112
+ tenantId: string;
113
+ id?: string;
114
+ params?: Record<string, string | number | boolean>;
115
+ }): string {
116
+ const base = `${parts.entity}:${parts.tenantId}`;
117
+ if (parts.id) {
118
+ return `${base}:${parts.id}`;
119
+ }
120
+ if (parts.params) {
121
+ // Deterministic stringification — sort keys
122
+ const sorted = Object.keys(parts.params)
123
+ .sort()
124
+ .map((k) => `${k}=${parts.params![k]}`)
125
+ .join('&');
126
+ return `${base}:list:${sorted}`;
127
+ }
128
+ return base;
129
+ }
130
+
131
+ // Examples:
132
+ // "project:tenant-abc:proj-123"
133
+ // "project:tenant-abc:list:page=1&status=active"
134
+ ```
135
+
136
+ ## Invalidation Strategies
137
+
138
+ ### TTL-Based (Simplest)
139
+
140
+ ```typescript
141
+ /**
142
+ * Set a time-to-live and accept that data may be stale
143
+ * for up to that duration. Appropriate when eventual
144
+ * consistency is acceptable.
145
+ */
146
+ await cache.set('project:tenant-1:proj-123', project, 300); // 5 minutes
147
+ ```
148
+
149
+ ### Event-Based Invalidation (Strongest Consistency)
150
+
151
+ ```typescript
152
+ /**
153
+ * Invalidate cache entries when the underlying data changes.
154
+ * Requires an event bus or change data capture pipeline.
155
+ */
156
+ eventBus.subscribe('project.updated', async (event) => {
157
+ const key = buildCacheKey({
158
+ entity: 'project',
159
+ tenantId: event.data.tenantId,
160
+ id: event.data.projectId,
161
+ });
162
+ await cache.delete(key);
163
+
164
+ // Also invalidate list caches for this tenant
165
+ await redis.deletePattern(`project:${event.data.tenantId}:list:*`);
166
+ });
167
+ ```
168
+
169
+ ### Write-Through
170
+
171
+ ```typescript
172
+ /**
173
+ * Update the cache synchronously on every write.
174
+ * The cache is always consistent but writes are slower.
175
+ */
176
+ export class WriteThroughRepository {
177
+ constructor(
178
+ private db: DatabaseClient,
179
+ private cache: CacheLayer,
180
+ ) {}
181
+
182
+ async update(tenantId: string, id: string, data: UpdateInput): Promise<Project> {
183
+ const updated = await this.db.update('projects', id, data);
184
+ const key = buildCacheKey({ entity: 'project', tenantId, id });
185
+ await this.cache.set(key, updated, 600);
186
+ return updated;
187
+ }
188
+ }
189
+ ```
190
+
191
+ ### Write-Behind (Write-Back)
192
+
193
+ ```typescript
194
+ /**
195
+ * Write to the cache immediately, persist to the database asynchronously.
196
+ * Higher write throughput but risk of data loss if the cache fails
197
+ * before the database write completes.
198
+ */
199
+ export class WriteBehindRepository {
200
+ constructor(
201
+ private db: DatabaseClient,
202
+ private cache: CacheLayer,
203
+ private writeQueue: Queue,
204
+ ) {}
205
+
206
+ async update(tenantId: string, id: string, data: UpdateInput): Promise<void> {
207
+ const key = buildCacheKey({ entity: 'project', tenantId, id });
208
+
209
+ // Update cache immediately (fast path)
210
+ const updated = { ...data, id, updatedAt: new Date().toISOString() };
211
+ await this.cache.set(key, updated, 600);
212
+
213
+ // Queue database write for async processing
214
+ await this.writeQueue.add('db-write', {
215
+ table: 'projects',
216
+ id,
217
+ data,
218
+ });
219
+ }
220
+ }
221
+ ```
222
+
223
+ ## Stampede Prevention
224
+
225
+ When a popular cache entry expires, many concurrent requests can hit the database simultaneously.
226
+
227
+ ### Mutex Lock
228
+
229
+ ```typescript
230
+ /**
231
+ * Only one request computes the value while others wait.
232
+ */
233
+ export async function getWithMutex<T>(
234
+ cache: CacheLayer,
235
+ redis: RedisClient,
236
+ key: string,
237
+ computeFn: () => Promise<T>,
238
+ ttlSeconds: number,
239
+ ): Promise<T> {
240
+ const cached = await cache.get<T>(key);
241
+ if (cached !== null) return cached;
242
+
243
+ const lockKey = `lock:${key}`;
244
+ const acquired = await redis.set(lockKey, '1', { NX: true, EX: 10 });
245
+
246
+ if (acquired) {
247
+ try {
248
+ const value = await computeFn();
249
+ await cache.set(key, value, ttlSeconds);
250
+ return value;
251
+ } finally {
252
+ await redis.del(lockKey);
253
+ }
254
+ }
255
+
256
+ // Another process is computing — wait and retry
257
+ await sleep(50);
258
+ return getWithMutex(cache, redis, key, computeFn, ttlSeconds);
259
+ }
260
+ ```
261
+
262
+ ### Probabilistic Early Expiration
263
+
264
+ ```typescript
265
+ /**
266
+ * Each request has a small probability of recomputing the value
267
+ * before it actually expires, spreading the refresh load over time.
268
+ *
269
+ * XFetch algorithm: recompute with probability that increases
270
+ * as the entry approaches its expiry time.
271
+ */
272
+ export async function getWithEarlyExpiry<T>(
273
+ cache: CacheLayer,
274
+ key: string,
275
+ computeFn: () => Promise<T>,
276
+ ttlSeconds: number,
277
+ ): Promise<T> {
278
+ const entry = await cache.getWithMetadata<T>(key);
279
+
280
+ if (entry !== null) {
281
+ const timeRemaining = entry.expiresAt - Date.now();
282
+ const totalTtl = ttlSeconds * 1000;
283
+ const beta = 1; // Tuning parameter
284
+
285
+ // Probability of early recompute increases as expiry approaches
286
+ const shouldRecompute =
287
+ timeRemaining > 0 &&
288
+ Math.random() < Math.exp((-timeRemaining / totalTtl) * beta);
289
+
290
+ if (!shouldRecompute) {
291
+ return entry.value;
292
+ }
293
+ }
294
+
295
+ const value = await computeFn();
296
+ await cache.set(key, value, ttlSeconds);
297
+ return value;
298
+ }
299
+ ```
300
+
301
+ ## Cache Warming
302
+
303
+ ```typescript
304
+ /**
305
+ * Pre-populate the cache on application startup or deployment
306
+ * to avoid cold-start latency for critical paths.
307
+ */
308
+ export async function warmCache(
309
+ db: DatabaseClient,
310
+ cache: CacheLayer,
311
+ ): Promise<void> {
312
+ logger.info('Warming cache...');
313
+
314
+ // Warm the most frequently accessed entities
315
+ const activeTenants = await db.query<{ id: string }>(
316
+ 'SELECT id FROM tenants WHERE status = $1 ORDER BY last_active_at DESC LIMIT 100',
317
+ ['active'],
318
+ );
319
+
320
+ for (const tenant of activeTenants) {
321
+ const projects = await db.query(
322
+ 'SELECT * FROM projects WHERE tenant_id = $1 AND status = $2',
323
+ [tenant.id, 'active'],
324
+ );
325
+
326
+ for (const project of projects) {
327
+ const key = buildCacheKey({
328
+ entity: 'project',
329
+ tenantId: tenant.id,
330
+ id: project.id,
331
+ });
332
+ await cache.set(key, project, 600);
333
+ }
334
+ }
335
+
336
+ logger.info('Cache warming complete', {
337
+ tenants: activeTenants.length,
338
+ });
339
+ }
340
+
341
+ // Call on startup, after migrations
342
+ await warmCache(db, cache);
343
+ ```
344
+
345
+ ## Trade-offs
346
+
347
+ - **Consistency vs latency:** Longer TTLs mean faster reads but staler data. Event-based invalidation is more consistent but adds infrastructure complexity.
348
+ - **Memory cost:** Caching everything is wasteful. Cache hot data and let cold data go to origin.
349
+ - **Write-through vs write-behind:** Write-through is consistent but adds write latency. Write-behind is fast but risks data loss.
350
+ - **Tiered caching complexity:** Each layer adds operational surface area. Start with one layer and add more only when measurements justify it.
351
+
352
+ ## Common Pitfalls
353
+
354
+ 1. **No invalidation strategy** — Setting a TTL and hoping for the best leads to stale data bugs that are hard to reproduce.
355
+ 2. **Cache keys missing a parameter** — If two different queries produce the same cache key, one overwrites the other with wrong data.
356
+ 3. **Caching errors** — A failed database query cached for 5 minutes means 5 minutes of errors. Never cache error responses.
357
+ 4. **Unbounded memory cache** — Without a max size and eviction policy (LRU), the process leaks memory. Set limits.
358
+ 5. **Cross-tenant cache pollution** — Always include tenant ID in the cache key. A missing tenant prefix leaks data across tenants.
359
+ 6. **Ignoring serialisation cost** — Serialising and deserialising large objects can negate the performance benefit of caching. Measure end-to-end latency.
@@ -0,0 +1,347 @@
1
+ # Architecture Pattern: Database Patterns
2
+
3
+ ## When to Use
4
+
5
+ - Any application with persistent state beyond trivial key-value storage
6
+ - Systems that require schema evolution over time without downtime
7
+ - Read-heavy workloads that need query optimisation and replica routing
8
+ - Multi-service architectures where connection management becomes critical
9
+
10
+ ## Pattern Description
11
+
12
+ Database patterns cover the full lifecycle of data management: how schemas evolve through migrations, how connections are pooled and managed, how queries are optimised, and how read/write workloads are distributed. Getting these fundamentals right prevents the most common class of production incidents.
13
+
14
+ ## Migration Management
15
+
16
+ Migrations must be versioned, reversible, and safe to run against a live database.
17
+
18
+ ```typescript
19
+ /**
20
+ * Migration file naming convention:
21
+ * YYYYMMDDHHMMSS_descriptive_name.ts
22
+ * 20250315120000_add_projects_table.ts
23
+ *
24
+ * Each migration has an up() and down() function.
25
+ * down() must perfectly reverse up().
26
+ */
27
+
28
+ // 20250315120000_add_projects_table.ts
29
+ export async function up(db: DatabaseClient): Promise<void> {
30
+ await db.schema.createTable('projects', (table) => {
31
+ table.uuid('id').primary().defaultTo(db.fn.uuid());
32
+ table.text('tenant_id').notNullable().index();
33
+ table.text('name').notNullable();
34
+ table.text('status').notNullable().defaultTo('active');
35
+ table.timestamp('created_at').notNullable().defaultTo(db.fn.now());
36
+ table.timestamp('updated_at').notNullable().defaultTo(db.fn.now());
37
+ });
38
+
39
+ // Composite index for the most common query pattern
40
+ await db.schema.raw(
41
+ 'CREATE INDEX idx_projects_tenant_status ON projects(tenant_id, status)',
42
+ );
43
+ }
44
+
45
+ export async function down(db: DatabaseClient): Promise<void> {
46
+ await db.schema.dropTableIfExists('projects');
47
+ }
48
+ ```
49
+
50
+ ### Safe Migration Practices
51
+
52
+ ```typescript
53
+ /**
54
+ * Rules for zero-downtime migrations:
55
+ *
56
+ * 1. NEVER rename a column in one step. Instead:
57
+ * Step 1: Add new column, backfill data
58
+ * Step 2: Deploy code that writes to both columns
59
+ * Step 3: Deploy code that reads from new column only
60
+ * Step 4: Drop old column
61
+ *
62
+ * 2. NEVER add a NOT NULL column without a default value
63
+ * to an existing table with data.
64
+ *
65
+ * 3. Adding an index on a large table — use CONCURRENTLY:
66
+ */
67
+ export async function up(db: DatabaseClient): Promise<void> {
68
+ // CREATE INDEX CONCURRENTLY does not lock the table for writes
69
+ await db.schema.raw(
70
+ 'CREATE INDEX CONCURRENTLY idx_orders_created ON orders(created_at)',
71
+ );
72
+ }
73
+
74
+ // CONCURRENTLY indexes cannot run inside a transaction,
75
+ // so mark this migration as non-transactional if your tool supports it.
76
+ export const config = { transaction: false };
77
+ ```
78
+
79
+ ## Connection Pooling
80
+
81
+ ```typescript
82
+ /**
83
+ * Connection pool sizing formula (PostgreSQL):
84
+ * pool_size = (core_count * 2) + effective_spindle_count
85
+ *
86
+ * For a typical 4-core cloud instance with SSDs: pool_size ~= 10
87
+ * Each Node.js process gets its own pool. With 4 processes: 10 / 4 = 2-3 per process.
88
+ */
89
+ export function createPool(config: DatabaseConfig): Pool {
90
+ return new Pool({
91
+ host: config.host,
92
+ port: config.port,
93
+ database: config.database,
94
+ user: config.user,
95
+ password: config.password,
96
+
97
+ // Pool sizing
98
+ min: 2, // Minimum idle connections
99
+ max: 10, // Maximum connections per process
100
+ idleTimeoutMillis: 30_000, // Close idle connections after 30s
101
+ connectionTimeoutMillis: 5_000, // Fail fast if no connection available
102
+
103
+ // Statement timeout to prevent runaway queries
104
+ statement_timeout: 30_000,
105
+ });
106
+ }
107
+
108
+ // Health check: verify pool is functional
109
+ export async function checkDatabaseHealth(pool: Pool): Promise<boolean> {
110
+ try {
111
+ const result = await pool.query('SELECT 1 AS ok');
112
+ return result.rows[0]?.ok === 1;
113
+ } catch {
114
+ return false;
115
+ }
116
+ }
117
+ ```
118
+
119
+ ## Query Optimisation Checklist
120
+
121
+ ```typescript
122
+ /**
123
+ * Before deploying a new query to production:
124
+ *
125
+ * 1. Run EXPLAIN ANALYZE on a representative dataset
126
+ * 2. Check for sequential scans on large tables (Seq Scan)
127
+ * 3. Verify index usage (Index Scan or Index Only Scan)
128
+ * 4. Check estimated vs actual row counts — large discrepancies
129
+ * mean stale statistics (run ANALYZE)
130
+ * 5. Look for nested loops with high row counts
131
+ * 6. Check for implicit type casts that prevent index usage
132
+ */
133
+
134
+ // Example: detecting slow queries at runtime
135
+ export function queryWithTiming<T>(
136
+ pool: Pool,
137
+ sql: string,
138
+ params: unknown[],
139
+ label: string,
140
+ ): Promise<T[]> {
141
+ const start = performance.now();
142
+ return pool.query(sql, params).then((result) => {
143
+ const duration = performance.now() - start;
144
+ if (duration > 100) {
145
+ logger.warn('Slow query detected', { label, duration, sql });
146
+ }
147
+ return result.rows as T[];
148
+ });
149
+ }
150
+ ```
151
+
152
+ ## Indexing Strategy
153
+
154
+ ```typescript
155
+ /**
156
+ * Index type selection guide (PostgreSQL):
157
+ *
158
+ * B-tree (default):
159
+ * - Equality and range queries (=, <, >, BETWEEN)
160
+ * - ORDER BY, GROUP BY
161
+ * - Most common choice
162
+ *
163
+ * GIN (Generalized Inverted Index):
164
+ * - JSONB containment (@>, ?)
165
+ * - Full-text search (tsvector)
166
+ * - Array containment (@>, &&)
167
+ *
168
+ * GiST (Generalized Search Tree):
169
+ * - Geometric/spatial data
170
+ * - Range types (overlaps, contains)
171
+ * - Nearest-neighbour searches
172
+ *
173
+ * BRIN (Block Range Index):
174
+ * - Very large tables where values correlate with physical order
175
+ * - Time-series data (created_at on append-only tables)
176
+ * - Tiny index size compared to B-tree
177
+ */
178
+
179
+ // Practical index examples
180
+ export async function up(db: DatabaseClient): Promise<void> {
181
+ // B-tree: exact lookups and range scans
182
+ await db.schema.raw(
183
+ 'CREATE INDEX idx_orders_status ON orders(status) WHERE status != \'completed\'',
184
+ );
185
+
186
+ // GIN: querying JSONB metadata
187
+ await db.schema.raw(
188
+ 'CREATE INDEX idx_projects_metadata ON projects USING gin(metadata jsonb_path_ops)',
189
+ );
190
+
191
+ // Composite index: column order matters — put equality columns first
192
+ await db.schema.raw(
193
+ 'CREATE INDEX idx_events_tenant_time ON events(tenant_id, created_at DESC)',
194
+ );
195
+ }
196
+ ```
197
+
198
+ ## Read Replica Routing
199
+
200
+ ```typescript
201
+ /**
202
+ * Route read-only queries to replicas, writes to the primary.
203
+ * Beware of replication lag — recently written data may not be
204
+ * available on replicas immediately.
205
+ */
206
+ export class RoutingDatabase {
207
+ constructor(
208
+ private primary: Pool,
209
+ private replicas: Pool[],
210
+ ) {}
211
+
212
+ private nextReplicaIndex = 0;
213
+
214
+ /** Route to primary for writes */
215
+ async write<T>(sql: string, params: unknown[]): Promise<T[]> {
216
+ const result = await this.primary.query(sql, params);
217
+ return result.rows as T[];
218
+ }
219
+
220
+ /** Round-robin across read replicas */
221
+ async read<T>(sql: string, params: unknown[]): Promise<T[]> {
222
+ const replica = this.replicas[this.nextReplicaIndex % this.replicas.length];
223
+ this.nextReplicaIndex++;
224
+ const result = await replica.query(sql, params);
225
+ return result.rows as T[];
226
+ }
227
+
228
+ /**
229
+ * Read from primary when you need to read-your-own-writes.
230
+ * Use sparingly — this adds load to the primary.
231
+ */
232
+ async readFromPrimary<T>(sql: string, params: unknown[]): Promise<T[]> {
233
+ const result = await this.primary.query(sql, params);
234
+ return result.rows as T[];
235
+ }
236
+ }
237
+ ```
238
+
239
+ ## Advisory Locks
240
+
241
+ ```typescript
242
+ /**
243
+ * Advisory locks for application-level mutual exclusion.
244
+ * Use when you need to prevent concurrent execution of a
245
+ * critical section across multiple processes.
246
+ */
247
+ export async function withAdvisoryLock<T>(
248
+ pool: Pool,
249
+ lockId: number,
250
+ fn: () => Promise<T>,
251
+ ): Promise<T> {
252
+ const client = await pool.connect();
253
+ try {
254
+ // pg_try_advisory_lock returns immediately (non-blocking)
255
+ const lockResult = await client.query(
256
+ 'SELECT pg_try_advisory_lock($1) AS acquired',
257
+ [lockId],
258
+ );
259
+
260
+ if (!lockResult.rows[0].acquired) {
261
+ throw new ConflictError('Could not acquire lock — another process holds it');
262
+ }
263
+
264
+ const result = await fn();
265
+ return result;
266
+ } finally {
267
+ await client.query('SELECT pg_advisory_unlock($1)', [lockId]);
268
+ client.release();
269
+ }
270
+ }
271
+
272
+ // Usage: prevent concurrent migration runs
273
+ await withAdvisoryLock(pool, 1001, async () => {
274
+ await runPendingMigrations();
275
+ });
276
+ ```
277
+
278
+ ## Transaction Isolation Levels
279
+
280
+ ```typescript
281
+ /**
282
+ * Isolation levels (from least to most strict):
283
+ *
284
+ * READ COMMITTED (PostgreSQL default):
285
+ * Each statement sees only committed data. Good for most workloads.
286
+ *
287
+ * REPEATABLE READ:
288
+ * The transaction sees a snapshot from its start. Use for reports
289
+ * or calculations that read the same data multiple times.
290
+ *
291
+ * SERIALIZABLE:
292
+ * Full isolation — transactions behave as if run sequentially.
293
+ * Highest safety, highest contention. Use for financial operations.
294
+ */
295
+ export async function transferFunds(
296
+ pool: Pool,
297
+ fromAccountId: string,
298
+ toAccountId: string,
299
+ amount: number,
300
+ ): Promise<void> {
301
+ const client = await pool.connect();
302
+ try {
303
+ await client.query('BEGIN ISOLATION LEVEL SERIALIZABLE');
304
+
305
+ const from = await client.query(
306
+ 'SELECT balance FROM accounts WHERE id = $1 FOR UPDATE',
307
+ [fromAccountId],
308
+ );
309
+ if (from.rows[0].balance < amount) {
310
+ throw new ValidationError('Insufficient funds');
311
+ }
312
+
313
+ await client.query(
314
+ 'UPDATE accounts SET balance = balance - $1 WHERE id = $2',
315
+ [amount, fromAccountId],
316
+ );
317
+ await client.query(
318
+ 'UPDATE accounts SET balance = balance + $1 WHERE id = $2',
319
+ [amount, toAccountId],
320
+ );
321
+
322
+ await client.query('COMMIT');
323
+ } catch (error) {
324
+ await client.query('ROLLBACK');
325
+ throw error;
326
+ } finally {
327
+ client.release();
328
+ }
329
+ }
330
+ ```
331
+
332
+ ## Trade-offs
333
+
334
+ - **Migration safety vs speed:** Zero-downtime migrations require more steps but avoid service interruption.
335
+ - **Pool size:** Too small causes connection starvation under load. Too large wastes database memory and can degrade performance.
336
+ - **Read replicas:** Reduce primary load but introduce replication lag. Not every read can tolerate stale data.
337
+ - **Stronger isolation:** Prevents anomalies but increases transaction conflicts and retries.
338
+ - **Index count:** More indexes speed reads but slow writes and consume storage.
339
+
340
+ ## Common Pitfalls
341
+
342
+ 1. **Irreversible migrations** — Every migration must have a working `down()`. Test it before deploying.
343
+ 2. **Missing indexes on foreign keys** — Unindexed foreign keys cause sequential scans on JOIN and DELETE operations.
344
+ 3. **N+1 queries** — Loading a list and then querying each item individually. Use JOINs or batch queries.
345
+ 4. **Long-running transactions** — Hold locks as briefly as possible. Move non-database work outside the transaction.
346
+ 5. **No connection timeout** — Without statement and connection timeouts, a single slow query can exhaust the pool.
347
+ 6. **Ignoring EXPLAIN output** — Query plans change as data grows. Review plans periodically, not just at development time.