@garethdaine/agentops 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +10 -0
- package/LICENSE +21 -0
- package/README.md +410 -0
- package/agents/architecture-researcher.md +115 -0
- package/agents/code-critic.md +190 -0
- package/agents/delegation-router.md +40 -0
- package/agents/feature-researcher.md +117 -0
- package/agents/interrogator.md +11 -0
- package/agents/pitfalls-researcher.md +112 -0
- package/agents/plan-validator.md +173 -0
- package/agents/proposer.md +61 -0
- package/agents/security-reviewer.md +189 -0
- package/agents/skill-builder.md +43 -0
- package/agents/spec-compliance-reviewer.md +154 -0
- package/agents/stack-researcher.md +89 -0
- package/commands/build.md +766 -0
- package/commands/code-analysis.md +39 -0
- package/commands/code-field.md +22 -0
- package/commands/compliance-check.md +34 -0
- package/commands/configure.md +178 -0
- package/commands/cost-report.md +17 -0
- package/commands/enterprise/adr.md +78 -0
- package/commands/enterprise/brainstorm.md +461 -0
- package/commands/enterprise/design.md +203 -0
- package/commands/enterprise/dev-setup.md +136 -0
- package/commands/enterprise/docker-dev.md +229 -0
- package/commands/enterprise/e2e.md +233 -0
- package/commands/enterprise/feature.md +218 -0
- package/commands/enterprise/gap-analysis.md +204 -0
- package/commands/enterprise/handover.md +195 -0
- package/commands/enterprise/herd.md +152 -0
- package/commands/enterprise/knowledge.md +173 -0
- package/commands/enterprise/onboard.md +86 -0
- package/commands/enterprise/qa-check.md +80 -0
- package/commands/enterprise/reason.md +196 -0
- package/commands/enterprise/review.md +177 -0
- package/commands/enterprise/scaffold.md +153 -0
- package/commands/enterprise/status-report.md +101 -0
- package/commands/enterprise/tech-catalog.md +170 -0
- package/commands/enterprise/test-gen.md +138 -0
- package/commands/evolve.md +39 -0
- package/commands/flags.md +44 -0
- package/commands/interrogate.md +263 -0
- package/commands/lesson.md +15 -0
- package/commands/lessons.md +10 -0
- package/commands/plan.md +44 -0
- package/commands/prune.md +27 -0
- package/commands/star.md +17 -0
- package/commands/supply-chain-scan.md +44 -0
- package/commands/unicode-scan.md +63 -0
- package/commands/verify.md +41 -0
- package/commands/workflow.md +436 -0
- package/hooks/ai-guardrails.sh +114 -0
- package/hooks/audit-log.sh +26 -0
- package/hooks/auto-delegate.sh +45 -0
- package/hooks/auto-evolve.sh +22 -0
- package/hooks/auto-lesson.sh +26 -0
- package/hooks/auto-plan.sh +59 -0
- package/hooks/auto-test.sh +46 -0
- package/hooks/auto-verify.sh +30 -0
- package/hooks/budget-check.sh +24 -0
- package/hooks/code-field-preamble.sh +30 -0
- package/hooks/compliance-gate.sh +50 -0
- package/hooks/content-trust.sh +22 -0
- package/hooks/credential-redact.sh +23 -0
- package/hooks/delegation-trust.sh +15 -0
- package/hooks/detect-test-run.sh +19 -0
- package/hooks/enforcement-lib.sh +60 -0
- package/hooks/evolve-gate.sh +32 -0
- package/hooks/evolve-lib.sh +32 -0
- package/hooks/exfiltration-check.sh +67 -0
- package/hooks/failure-collector.sh +27 -0
- package/hooks/feature-flags.sh +67 -0
- package/hooks/file-provenance.sh +31 -0
- package/hooks/flag-utils.sh +36 -0
- package/hooks/hooks.json +145 -0
- package/hooks/injection-scan.sh +58 -0
- package/hooks/integrity-verify.sh +91 -0
- package/hooks/lessons-check.sh +17 -0
- package/hooks/lockfile-audit.sh +109 -0
- package/hooks/patterns-lib.sh +22 -0
- package/hooks/plan-gate.sh +18 -0
- package/hooks/redact-lib.sh +15 -0
- package/hooks/runtime-mode.sh +56 -0
- package/hooks/session-cleanup.sh +74 -0
- package/hooks/skill-validator.sh +28 -0
- package/hooks/standards-enforce.sh +106 -0
- package/hooks/star-gate.sh +93 -0
- package/hooks/star-preamble.sh +10 -0
- package/hooks/telemetry.sh +33 -0
- package/hooks/todo-prune.sh +84 -0
- package/hooks/unicode-firewall.sh +122 -0
- package/hooks/unicode-lib.sh +66 -0
- package/hooks/unicode-scan-session.sh +96 -0
- package/hooks/validate-command.sh +103 -0
- package/hooks/validate-env.sh +51 -0
- package/hooks/validate-path.sh +81 -0
- package/package.json +40 -0
- package/settings.json +6 -0
- package/templates/ai-config/tool-standards.md +56 -0
- package/templates/architecture/api-first.md +192 -0
- package/templates/architecture/auth-patterns.md +302 -0
- package/templates/architecture/caching-strategy.md +359 -0
- package/templates/architecture/database-patterns.md +347 -0
- package/templates/architecture/event-driven.md +252 -0
- package/templates/architecture/integration-patterns.md +185 -0
- package/templates/architecture/multi-tenancy.md +104 -0
- package/templates/architecture/service-boundaries.md +200 -0
- package/templates/build/brief-template.md +86 -0
- package/templates/build/summary-template.md +100 -0
- package/templates/build/task-plan-template.md +133 -0
- package/templates/communication/effort-estimate.md +54 -0
- package/templates/communication/incident-response.md +59 -0
- package/templates/communication/post-mortem.md +109 -0
- package/templates/communication/risk-register.md +43 -0
- package/templates/communication/sprint-demo-checklist.md +64 -0
- package/templates/communication/stakeholder-presentation-outline.md +84 -0
- package/templates/communication/technical-proposal.md +77 -0
- package/templates/delivery/deployment/deployment-checklist.md +49 -0
- package/templates/delivery/design/solution-design-checklist.md +37 -0
- package/templates/delivery/discovery/stakeholder-questions.md +33 -0
- package/templates/delivery/handover/knowledge-transfer-checklist.md +75 -0
- package/templates/delivery/handover/operational-runbook.md +117 -0
- package/templates/delivery/handover/support-escalation-matrix.md +56 -0
- package/templates/delivery/implementation/blocker-escalation-template.md +55 -0
- package/templates/delivery/implementation/sprint-planning-template.md +49 -0
- package/templates/delivery/implementation/task-decomposition-guide.md +59 -0
- package/templates/delivery/qa/test-plan-template.md +76 -0
- package/templates/delivery/qa/test-results-template.md +55 -0
- package/templates/delivery/qa/uat-signoff-template.md +44 -0
- package/templates/governance/codeowners.md +60 -0
- package/templates/integration/adapter-pattern.md +160 -0
- package/templates/scaffolds/env-validation.md +85 -0
- package/templates/scaffolds/error-handling.md +171 -0
- package/templates/scaffolds/graceful-shutdown.md +139 -0
- package/templates/scaffolds/health-check.md +109 -0
- package/templates/scaffolds/structured-logging.md +134 -0
- package/templates/standards/engineering-standards.md +413 -0
- package/templates/standards/standards-checklist.md +125 -0
- package/templates/tech-catalog.json +663 -0
- package/templates/utilities/project-detection.md +75 -0
- package/templates/utilities/requirements-collection.md +68 -0
- package/templates/utilities/template-rendering.md +81 -0
- package/templates/workflows/architecture-decision.md +90 -0
- package/templates/workflows/bug-investigation.md +83 -0
- package/templates/workflows/feature-implementation.md +80 -0
- package/templates/workflows/refactoring.md +83 -0
- package/templates/workflows/spike-exploration.md +82 -0
|
@@ -0,0 +1,359 @@
|
|
|
1
|
+
# Architecture Pattern: Caching Strategy
|
|
2
|
+
|
|
3
|
+
## When to Use
|
|
4
|
+
|
|
5
|
+
- Read-heavy workloads where the same data is requested repeatedly
|
|
6
|
+
- Expensive computations or queries that can tolerate slightly stale results
|
|
7
|
+
- Reducing latency for frequently accessed resources
|
|
8
|
+
- Protecting backend services from traffic spikes
|
|
9
|
+
|
|
10
|
+
## Pattern Description
|
|
11
|
+
|
|
12
|
+
Caching stores computed results closer to the consumer to avoid redundant work. Effective caching requires deliberate decisions about what to cache, where to cache it, how long entries remain valid, and how to invalidate them when the source data changes. The hardest part of caching is not adding it — it is invalidating it correctly.
|
|
13
|
+
|
|
14
|
+
## Cache Layers
|
|
15
|
+
|
|
16
|
+
```typescript
|
|
17
|
+
/**
|
|
18
|
+
* Layer 1: Application memory (fastest, per-process, lost on restart)
|
|
19
|
+
* Layer 2: Redis / Memcached (shared across processes, survives restarts)
|
|
20
|
+
* Layer 3: CDN (edge cache for static and semi-static content)
|
|
21
|
+
*
|
|
22
|
+
* Check layers in order: memory -> Redis -> origin
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
export interface CacheLayer {
|
|
26
|
+
get<T>(key: string): Promise<T | null>;
|
|
27
|
+
set<T>(key: string, value: T, ttlSeconds: number): Promise<void>;
|
|
28
|
+
delete(key: string): Promise<void>;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export class TieredCache implements CacheLayer {
|
|
32
|
+
constructor(
|
|
33
|
+
private memory: CacheLayer,
|
|
34
|
+
private redis: CacheLayer,
|
|
35
|
+
) {}
|
|
36
|
+
|
|
37
|
+
async get<T>(key: string): Promise<T | null> {
|
|
38
|
+
// Check local memory first (sub-millisecond)
|
|
39
|
+
const memResult = await this.memory.get<T>(key);
|
|
40
|
+
if (memResult !== null) return memResult;
|
|
41
|
+
|
|
42
|
+
// Fall back to Redis (1-2ms network hop)
|
|
43
|
+
const redisResult = await this.redis.get<T>(key);
|
|
44
|
+
if (redisResult !== null) {
|
|
45
|
+
// Promote to memory cache with a shorter TTL
|
|
46
|
+
await this.memory.set(key, redisResult, 30);
|
|
47
|
+
return redisResult;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
return null;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
async set<T>(key: string, value: T, ttlSeconds: number): Promise<void> {
|
|
54
|
+
await Promise.all([
|
|
55
|
+
this.memory.set(key, value, Math.min(ttlSeconds, 60)),
|
|
56
|
+
this.redis.set(key, value, ttlSeconds),
|
|
57
|
+
]);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
async delete(key: string): Promise<void> {
|
|
61
|
+
await Promise.all([
|
|
62
|
+
this.memory.delete(key),
|
|
63
|
+
this.redis.delete(key),
|
|
64
|
+
]);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### In-Memory Cache Implementation
|
|
70
|
+
|
|
71
|
+
```typescript
|
|
72
|
+
export class MemoryCache implements CacheLayer {
|
|
73
|
+
private store = new Map<string, { value: unknown; expiresAt: number }>();
|
|
74
|
+
|
|
75
|
+
async get<T>(key: string): Promise<T | null> {
|
|
76
|
+
const entry = this.store.get(key);
|
|
77
|
+
if (!entry) return null;
|
|
78
|
+
if (Date.now() > entry.expiresAt) {
|
|
79
|
+
this.store.delete(key);
|
|
80
|
+
return null;
|
|
81
|
+
}
|
|
82
|
+
return entry.value as T;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
async set<T>(key: string, value: T, ttlSeconds: number): Promise<void> {
|
|
86
|
+
this.store.set(key, {
|
|
87
|
+
value,
|
|
88
|
+
expiresAt: Date.now() + ttlSeconds * 1000,
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
async delete(key: string): Promise<void> {
|
|
93
|
+
this.store.delete(key);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## Cache Key Design
|
|
99
|
+
|
|
100
|
+
```typescript
|
|
101
|
+
/**
|
|
102
|
+
* Cache key rules:
|
|
103
|
+
* 1. Include all parameters that affect the result
|
|
104
|
+
* 2. Prefix with entity type for easy bulk invalidation
|
|
105
|
+
* 3. Include tenant ID to prevent cross-tenant data leaks
|
|
106
|
+
* 4. Keep keys human-readable for debugging
|
|
107
|
+
* 5. Use a consistent separator (colon is conventional for Redis)
|
|
108
|
+
*/
|
|
109
|
+
|
|
110
|
+
export function buildCacheKey(parts: {
|
|
111
|
+
entity: string;
|
|
112
|
+
tenantId: string;
|
|
113
|
+
id?: string;
|
|
114
|
+
params?: Record<string, string | number | boolean>;
|
|
115
|
+
}): string {
|
|
116
|
+
const base = `${parts.entity}:${parts.tenantId}`;
|
|
117
|
+
if (parts.id) {
|
|
118
|
+
return `${base}:${parts.id}`;
|
|
119
|
+
}
|
|
120
|
+
if (parts.params) {
|
|
121
|
+
// Deterministic stringification — sort keys
|
|
122
|
+
const sorted = Object.keys(parts.params)
|
|
123
|
+
.sort()
|
|
124
|
+
.map((k) => `${k}=${parts.params![k]}`)
|
|
125
|
+
.join('&');
|
|
126
|
+
return `${base}:list:${sorted}`;
|
|
127
|
+
}
|
|
128
|
+
return base;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Examples:
|
|
132
|
+
// "project:tenant-abc:proj-123"
|
|
133
|
+
// "project:tenant-abc:list:page=1&status=active"
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
## Invalidation Strategies
|
|
137
|
+
|
|
138
|
+
### TTL-Based (Simplest)
|
|
139
|
+
|
|
140
|
+
```typescript
|
|
141
|
+
/**
|
|
142
|
+
* Set a time-to-live and accept that data may be stale
|
|
143
|
+
* for up to that duration. Appropriate when eventual
|
|
144
|
+
* consistency is acceptable.
|
|
145
|
+
*/
|
|
146
|
+
await cache.set('project:tenant-1:proj-123', project, 300); // 5 minutes
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### Event-Based Invalidation (Strongest Consistency)
|
|
150
|
+
|
|
151
|
+
```typescript
|
|
152
|
+
/**
|
|
153
|
+
* Invalidate cache entries when the underlying data changes.
|
|
154
|
+
* Requires an event bus or change data capture pipeline.
|
|
155
|
+
*/
|
|
156
|
+
eventBus.subscribe('project.updated', async (event) => {
|
|
157
|
+
const key = buildCacheKey({
|
|
158
|
+
entity: 'project',
|
|
159
|
+
tenantId: event.data.tenantId,
|
|
160
|
+
id: event.data.projectId,
|
|
161
|
+
});
|
|
162
|
+
await cache.delete(key);
|
|
163
|
+
|
|
164
|
+
// Also invalidate list caches for this tenant
|
|
165
|
+
await redis.deletePattern(`project:${event.data.tenantId}:list:*`);
|
|
166
|
+
});
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
### Write-Through
|
|
170
|
+
|
|
171
|
+
```typescript
|
|
172
|
+
/**
|
|
173
|
+
* Update the cache synchronously on every write.
|
|
174
|
+
* The cache is always consistent but writes are slower.
|
|
175
|
+
*/
|
|
176
|
+
export class WriteThroughRepository {
|
|
177
|
+
constructor(
|
|
178
|
+
private db: DatabaseClient,
|
|
179
|
+
private cache: CacheLayer,
|
|
180
|
+
) {}
|
|
181
|
+
|
|
182
|
+
async update(tenantId: string, id: string, data: UpdateInput): Promise<Project> {
|
|
183
|
+
const updated = await this.db.update('projects', id, data);
|
|
184
|
+
const key = buildCacheKey({ entity: 'project', tenantId, id });
|
|
185
|
+
await this.cache.set(key, updated, 600);
|
|
186
|
+
return updated;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
### Write-Behind (Write-Back)
|
|
192
|
+
|
|
193
|
+
```typescript
|
|
194
|
+
/**
|
|
195
|
+
* Write to the cache immediately, persist to the database asynchronously.
|
|
196
|
+
* Higher write throughput but risk of data loss if the cache fails
|
|
197
|
+
* before the database write completes.
|
|
198
|
+
*/
|
|
199
|
+
export class WriteBehindRepository {
|
|
200
|
+
constructor(
|
|
201
|
+
private db: DatabaseClient,
|
|
202
|
+
private cache: CacheLayer,
|
|
203
|
+
private writeQueue: Queue,
|
|
204
|
+
) {}
|
|
205
|
+
|
|
206
|
+
async update(tenantId: string, id: string, data: UpdateInput): Promise<void> {
|
|
207
|
+
const key = buildCacheKey({ entity: 'project', tenantId, id });
|
|
208
|
+
|
|
209
|
+
// Update cache immediately (fast path)
|
|
210
|
+
const updated = { ...data, id, updatedAt: new Date().toISOString() };
|
|
211
|
+
await this.cache.set(key, updated, 600);
|
|
212
|
+
|
|
213
|
+
// Queue database write for async processing
|
|
214
|
+
await this.writeQueue.add('db-write', {
|
|
215
|
+
table: 'projects',
|
|
216
|
+
id,
|
|
217
|
+
data,
|
|
218
|
+
});
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
## Stampede Prevention
|
|
224
|
+
|
|
225
|
+
When a popular cache entry expires, many concurrent requests can hit the database simultaneously.
|
|
226
|
+
|
|
227
|
+
### Mutex Lock
|
|
228
|
+
|
|
229
|
+
```typescript
|
|
230
|
+
/**
|
|
231
|
+
* Only one request computes the value while others wait.
|
|
232
|
+
*/
|
|
233
|
+
export async function getWithMutex<T>(
|
|
234
|
+
cache: CacheLayer,
|
|
235
|
+
redis: RedisClient,
|
|
236
|
+
key: string,
|
|
237
|
+
computeFn: () => Promise<T>,
|
|
238
|
+
ttlSeconds: number,
|
|
239
|
+
): Promise<T> {
|
|
240
|
+
const cached = await cache.get<T>(key);
|
|
241
|
+
if (cached !== null) return cached;
|
|
242
|
+
|
|
243
|
+
const lockKey = `lock:${key}`;
|
|
244
|
+
const acquired = await redis.set(lockKey, '1', { NX: true, EX: 10 });
|
|
245
|
+
|
|
246
|
+
if (acquired) {
|
|
247
|
+
try {
|
|
248
|
+
const value = await computeFn();
|
|
249
|
+
await cache.set(key, value, ttlSeconds);
|
|
250
|
+
return value;
|
|
251
|
+
} finally {
|
|
252
|
+
await redis.del(lockKey);
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
// Another process is computing — wait and retry
|
|
257
|
+
await sleep(50);
|
|
258
|
+
return getWithMutex(cache, redis, key, computeFn, ttlSeconds);
|
|
259
|
+
}
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
### Probabilistic Early Expiration
|
|
263
|
+
|
|
264
|
+
```typescript
|
|
265
|
+
/**
|
|
266
|
+
* Each request has a small probability of recomputing the value
|
|
267
|
+
* before it actually expires, spreading the refresh load over time.
|
|
268
|
+
*
|
|
269
|
+
* XFetch algorithm: recompute with probability that increases
|
|
270
|
+
* as the entry approaches its expiry time.
|
|
271
|
+
*/
|
|
272
|
+
export async function getWithEarlyExpiry<T>(
|
|
273
|
+
cache: CacheLayer,
|
|
274
|
+
key: string,
|
|
275
|
+
computeFn: () => Promise<T>,
|
|
276
|
+
ttlSeconds: number,
|
|
277
|
+
): Promise<T> {
|
|
278
|
+
const entry = await cache.getWithMetadata<T>(key);
|
|
279
|
+
|
|
280
|
+
if (entry !== null) {
|
|
281
|
+
const timeRemaining = entry.expiresAt - Date.now();
|
|
282
|
+
const totalTtl = ttlSeconds * 1000;
|
|
283
|
+
const beta = 1; // Tuning parameter
|
|
284
|
+
|
|
285
|
+
// Probability of early recompute increases as expiry approaches
|
|
286
|
+
const shouldRecompute =
|
|
287
|
+
timeRemaining > 0 &&
|
|
288
|
+
Math.random() < Math.exp((-timeRemaining / totalTtl) * beta);
|
|
289
|
+
|
|
290
|
+
if (!shouldRecompute) {
|
|
291
|
+
return entry.value;
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
const value = await computeFn();
|
|
296
|
+
await cache.set(key, value, ttlSeconds);
|
|
297
|
+
return value;
|
|
298
|
+
}
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
## Cache Warming
|
|
302
|
+
|
|
303
|
+
```typescript
|
|
304
|
+
/**
|
|
305
|
+
* Pre-populate the cache on application startup or deployment
|
|
306
|
+
* to avoid cold-start latency for critical paths.
|
|
307
|
+
*/
|
|
308
|
+
export async function warmCache(
|
|
309
|
+
db: DatabaseClient,
|
|
310
|
+
cache: CacheLayer,
|
|
311
|
+
): Promise<void> {
|
|
312
|
+
logger.info('Warming cache...');
|
|
313
|
+
|
|
314
|
+
// Warm the most frequently accessed entities
|
|
315
|
+
const activeTenants = await db.query<{ id: string }>(
|
|
316
|
+
'SELECT id FROM tenants WHERE status = $1 ORDER BY last_active_at DESC LIMIT 100',
|
|
317
|
+
['active'],
|
|
318
|
+
);
|
|
319
|
+
|
|
320
|
+
for (const tenant of activeTenants) {
|
|
321
|
+
const projects = await db.query(
|
|
322
|
+
'SELECT * FROM projects WHERE tenant_id = $1 AND status = $2',
|
|
323
|
+
[tenant.id, 'active'],
|
|
324
|
+
);
|
|
325
|
+
|
|
326
|
+
for (const project of projects) {
|
|
327
|
+
const key = buildCacheKey({
|
|
328
|
+
entity: 'project',
|
|
329
|
+
tenantId: tenant.id,
|
|
330
|
+
id: project.id,
|
|
331
|
+
});
|
|
332
|
+
await cache.set(key, project, 600);
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
logger.info('Cache warming complete', {
|
|
337
|
+
tenants: activeTenants.length,
|
|
338
|
+
});
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
// Call on startup, after migrations
|
|
342
|
+
await warmCache(db, cache);
|
|
343
|
+
```
|
|
344
|
+
|
|
345
|
+
## Trade-offs
|
|
346
|
+
|
|
347
|
+
- **Consistency vs latency:** Longer TTLs mean faster reads but staler data. Event-based invalidation is more consistent but adds infrastructure complexity.
|
|
348
|
+
- **Memory cost:** Caching everything is wasteful. Cache hot data and let cold data go to origin.
|
|
349
|
+
- **Write-through vs write-behind:** Write-through is consistent but adds write latency. Write-behind is fast but risks data loss.
|
|
350
|
+
- **Tiered caching complexity:** Each layer adds operational surface area. Start with one layer and add more only when measurements justify it.
|
|
351
|
+
|
|
352
|
+
## Common Pitfalls
|
|
353
|
+
|
|
354
|
+
1. **No invalidation strategy** — Setting a TTL and hoping for the best leads to stale data bugs that are hard to reproduce.
|
|
355
|
+
2. **Cache keys missing a parameter** — If two different queries produce the same cache key, one overwrites the other with wrong data.
|
|
356
|
+
3. **Caching errors** — A failed database query cached for 5 minutes means 5 minutes of errors. Never cache error responses.
|
|
357
|
+
4. **Unbounded memory cache** — Without a max size and eviction policy (LRU), the process leaks memory. Set limits.
|
|
358
|
+
5. **Cross-tenant cache pollution** — Always include tenant ID in the cache key. A missing tenant prefix leaks data across tenants.
|
|
359
|
+
6. **Ignoring serialisation cost** — Serialising and deserialising large objects can negate the performance benefit of caching. Measure end-to-end latency.
|
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
# Architecture Pattern: Database Patterns
|
|
2
|
+
|
|
3
|
+
## When to Use
|
|
4
|
+
|
|
5
|
+
- Any application with persistent state beyond trivial key-value storage
|
|
6
|
+
- Systems that require schema evolution over time without downtime
|
|
7
|
+
- Read-heavy workloads that need query optimisation and replica routing
|
|
8
|
+
- Multi-service architectures where connection management becomes critical
|
|
9
|
+
|
|
10
|
+
## Pattern Description
|
|
11
|
+
|
|
12
|
+
Database patterns cover the full lifecycle of data management: how schemas evolve through migrations, how connections are pooled and managed, how queries are optimised, and how read/write workloads are distributed. Getting these fundamentals right prevents the most common class of production incidents.
|
|
13
|
+
|
|
14
|
+
## Migration Management
|
|
15
|
+
|
|
16
|
+
Migrations must be versioned, reversible, and safe to run against a live database.
|
|
17
|
+
|
|
18
|
+
```typescript
|
|
19
|
+
/**
|
|
20
|
+
* Migration file naming convention:
|
|
21
|
+
* YYYYMMDDHHMMSS_descriptive_name.ts
|
|
22
|
+
* 20250315120000_add_projects_table.ts
|
|
23
|
+
*
|
|
24
|
+
* Each migration has an up() and down() function.
|
|
25
|
+
* down() must perfectly reverse up().
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
// 20250315120000_add_projects_table.ts
|
|
29
|
+
export async function up(db: DatabaseClient): Promise<void> {
|
|
30
|
+
await db.schema.createTable('projects', (table) => {
|
|
31
|
+
table.uuid('id').primary().defaultTo(db.fn.uuid());
|
|
32
|
+
table.text('tenant_id').notNullable().index();
|
|
33
|
+
table.text('name').notNullable();
|
|
34
|
+
table.text('status').notNullable().defaultTo('active');
|
|
35
|
+
table.timestamp('created_at').notNullable().defaultTo(db.fn.now());
|
|
36
|
+
table.timestamp('updated_at').notNullable().defaultTo(db.fn.now());
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
// Composite index for the most common query pattern
|
|
40
|
+
await db.schema.raw(
|
|
41
|
+
'CREATE INDEX idx_projects_tenant_status ON projects(tenant_id, status)',
|
|
42
|
+
);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export async function down(db: DatabaseClient): Promise<void> {
|
|
46
|
+
await db.schema.dropTableIfExists('projects');
|
|
47
|
+
}
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### Safe Migration Practices
|
|
51
|
+
|
|
52
|
+
```typescript
|
|
53
|
+
/**
|
|
54
|
+
* Rules for zero-downtime migrations:
|
|
55
|
+
*
|
|
56
|
+
* 1. NEVER rename a column in one step. Instead:
|
|
57
|
+
* Step 1: Add new column, backfill data
|
|
58
|
+
* Step 2: Deploy code that writes to both columns
|
|
59
|
+
* Step 3: Deploy code that reads from new column only
|
|
60
|
+
* Step 4: Drop old column
|
|
61
|
+
*
|
|
62
|
+
* 2. NEVER add a NOT NULL column without a default value
|
|
63
|
+
* to an existing table with data.
|
|
64
|
+
*
|
|
65
|
+
* 3. Adding an index on a large table — use CONCURRENTLY:
|
|
66
|
+
*/
|
|
67
|
+
export async function up(db: DatabaseClient): Promise<void> {
|
|
68
|
+
// CREATE INDEX CONCURRENTLY does not lock the table for writes
|
|
69
|
+
await db.schema.raw(
|
|
70
|
+
'CREATE INDEX CONCURRENTLY idx_orders_created ON orders(created_at)',
|
|
71
|
+
);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// CONCURRENTLY indexes cannot run inside a transaction,
|
|
75
|
+
// so mark this migration as non-transactional if your tool supports it.
|
|
76
|
+
export const config = { transaction: false };
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## Connection Pooling
|
|
80
|
+
|
|
81
|
+
```typescript
|
|
82
|
+
/**
|
|
83
|
+
* Connection pool sizing formula (PostgreSQL):
|
|
84
|
+
* pool_size = (core_count * 2) + effective_spindle_count
|
|
85
|
+
*
|
|
86
|
+
* For a typical 4-core cloud instance with SSDs: pool_size ~= 10
|
|
87
|
+
* Each Node.js process gets its own pool. With 4 processes: 10 / 4 = 2-3 per process.
|
|
88
|
+
*/
|
|
89
|
+
export function createPool(config: DatabaseConfig): Pool {
|
|
90
|
+
return new Pool({
|
|
91
|
+
host: config.host,
|
|
92
|
+
port: config.port,
|
|
93
|
+
database: config.database,
|
|
94
|
+
user: config.user,
|
|
95
|
+
password: config.password,
|
|
96
|
+
|
|
97
|
+
// Pool sizing
|
|
98
|
+
min: 2, // Minimum idle connections
|
|
99
|
+
max: 10, // Maximum connections per process
|
|
100
|
+
idleTimeoutMillis: 30_000, // Close idle connections after 30s
|
|
101
|
+
connectionTimeoutMillis: 5_000, // Fail fast if no connection available
|
|
102
|
+
|
|
103
|
+
// Statement timeout to prevent runaway queries
|
|
104
|
+
statement_timeout: 30_000,
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// Health check: verify pool is functional
|
|
109
|
+
export async function checkDatabaseHealth(pool: Pool): Promise<boolean> {
|
|
110
|
+
try {
|
|
111
|
+
const result = await pool.query('SELECT 1 AS ok');
|
|
112
|
+
return result.rows[0]?.ok === 1;
|
|
113
|
+
} catch {
|
|
114
|
+
return false;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## Query Optimisation Checklist
|
|
120
|
+
|
|
121
|
+
```typescript
|
|
122
|
+
/**
|
|
123
|
+
* Before deploying a new query to production:
|
|
124
|
+
*
|
|
125
|
+
* 1. Run EXPLAIN ANALYZE on a representative dataset
|
|
126
|
+
* 2. Check for sequential scans on large tables (Seq Scan)
|
|
127
|
+
* 3. Verify index usage (Index Scan or Index Only Scan)
|
|
128
|
+
* 4. Check estimated vs actual row counts — large discrepancies
|
|
129
|
+
* mean stale statistics (run ANALYZE)
|
|
130
|
+
* 5. Look for nested loops with high row counts
|
|
131
|
+
* 6. Check for implicit type casts that prevent index usage
|
|
132
|
+
*/
|
|
133
|
+
|
|
134
|
+
// Example: detecting slow queries at runtime
|
|
135
|
+
export function queryWithTiming<T>(
|
|
136
|
+
pool: Pool,
|
|
137
|
+
sql: string,
|
|
138
|
+
params: unknown[],
|
|
139
|
+
label: string,
|
|
140
|
+
): Promise<T[]> {
|
|
141
|
+
const start = performance.now();
|
|
142
|
+
return pool.query(sql, params).then((result) => {
|
|
143
|
+
const duration = performance.now() - start;
|
|
144
|
+
if (duration > 100) {
|
|
145
|
+
logger.warn('Slow query detected', { label, duration, sql });
|
|
146
|
+
}
|
|
147
|
+
return result.rows as T[];
|
|
148
|
+
});
|
|
149
|
+
}
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## Indexing Strategy
|
|
153
|
+
|
|
154
|
+
```typescript
|
|
155
|
+
/**
|
|
156
|
+
* Index type selection guide (PostgreSQL):
|
|
157
|
+
*
|
|
158
|
+
* B-tree (default):
|
|
159
|
+
* - Equality and range queries (=, <, >, BETWEEN)
|
|
160
|
+
* - ORDER BY, GROUP BY
|
|
161
|
+
* - Most common choice
|
|
162
|
+
*
|
|
163
|
+
* GIN (Generalized Inverted Index):
|
|
164
|
+
* - JSONB containment (@>, ?)
|
|
165
|
+
* - Full-text search (tsvector)
|
|
166
|
+
* - Array containment (@>, &&)
|
|
167
|
+
*
|
|
168
|
+
* GiST (Generalized Search Tree):
|
|
169
|
+
* - Geometric/spatial data
|
|
170
|
+
* - Range types (overlaps, contains)
|
|
171
|
+
* - Nearest-neighbour searches
|
|
172
|
+
*
|
|
173
|
+
* BRIN (Block Range Index):
|
|
174
|
+
* - Very large tables where values correlate with physical order
|
|
175
|
+
* - Time-series data (created_at on append-only tables)
|
|
176
|
+
* - Tiny index size compared to B-tree
|
|
177
|
+
*/
|
|
178
|
+
|
|
179
|
+
// Practical index examples
|
|
180
|
+
export async function up(db: DatabaseClient): Promise<void> {
|
|
181
|
+
// B-tree: exact lookups and range scans
|
|
182
|
+
await db.schema.raw(
|
|
183
|
+
'CREATE INDEX idx_orders_status ON orders(status) WHERE status != \'completed\'',
|
|
184
|
+
);
|
|
185
|
+
|
|
186
|
+
// GIN: querying JSONB metadata
|
|
187
|
+
await db.schema.raw(
|
|
188
|
+
'CREATE INDEX idx_projects_metadata ON projects USING gin(metadata jsonb_path_ops)',
|
|
189
|
+
);
|
|
190
|
+
|
|
191
|
+
// Composite index: column order matters — put equality columns first
|
|
192
|
+
await db.schema.raw(
|
|
193
|
+
'CREATE INDEX idx_events_tenant_time ON events(tenant_id, created_at DESC)',
|
|
194
|
+
);
|
|
195
|
+
}
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
## Read Replica Routing
|
|
199
|
+
|
|
200
|
+
```typescript
|
|
201
|
+
/**
|
|
202
|
+
* Route read-only queries to replicas, writes to the primary.
|
|
203
|
+
* Beware of replication lag — recently written data may not be
|
|
204
|
+
* available on replicas immediately.
|
|
205
|
+
*/
|
|
206
|
+
export class RoutingDatabase {
|
|
207
|
+
constructor(
|
|
208
|
+
private primary: Pool,
|
|
209
|
+
private replicas: Pool[],
|
|
210
|
+
) {}
|
|
211
|
+
|
|
212
|
+
private nextReplicaIndex = 0;
|
|
213
|
+
|
|
214
|
+
/** Route to primary for writes */
|
|
215
|
+
async write<T>(sql: string, params: unknown[]): Promise<T[]> {
|
|
216
|
+
const result = await this.primary.query(sql, params);
|
|
217
|
+
return result.rows as T[];
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
/** Round-robin across read replicas */
|
|
221
|
+
async read<T>(sql: string, params: unknown[]): Promise<T[]> {
|
|
222
|
+
const replica = this.replicas[this.nextReplicaIndex % this.replicas.length];
|
|
223
|
+
this.nextReplicaIndex++;
|
|
224
|
+
const result = await replica.query(sql, params);
|
|
225
|
+
return result.rows as T[];
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/**
|
|
229
|
+
* Read from primary when you need to read-your-own-writes.
|
|
230
|
+
* Use sparingly — this adds load to the primary.
|
|
231
|
+
*/
|
|
232
|
+
async readFromPrimary<T>(sql: string, params: unknown[]): Promise<T[]> {
|
|
233
|
+
const result = await this.primary.query(sql, params);
|
|
234
|
+
return result.rows as T[];
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
## Advisory Locks
|
|
240
|
+
|
|
241
|
+
```typescript
|
|
242
|
+
/**
|
|
243
|
+
* Advisory locks for application-level mutual exclusion.
|
|
244
|
+
* Use when you need to prevent concurrent execution of a
|
|
245
|
+
* critical section across multiple processes.
|
|
246
|
+
*/
|
|
247
|
+
export async function withAdvisoryLock<T>(
|
|
248
|
+
pool: Pool,
|
|
249
|
+
lockId: number,
|
|
250
|
+
fn: () => Promise<T>,
|
|
251
|
+
): Promise<T> {
|
|
252
|
+
const client = await pool.connect();
|
|
253
|
+
try {
|
|
254
|
+
// pg_try_advisory_lock returns immediately (non-blocking)
|
|
255
|
+
const lockResult = await client.query(
|
|
256
|
+
'SELECT pg_try_advisory_lock($1) AS acquired',
|
|
257
|
+
[lockId],
|
|
258
|
+
);
|
|
259
|
+
|
|
260
|
+
if (!lockResult.rows[0].acquired) {
|
|
261
|
+
throw new ConflictError('Could not acquire lock — another process holds it');
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
const result = await fn();
|
|
265
|
+
return result;
|
|
266
|
+
} finally {
|
|
267
|
+
await client.query('SELECT pg_advisory_unlock($1)', [lockId]);
|
|
268
|
+
client.release();
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
// Usage: prevent concurrent migration runs
|
|
273
|
+
await withAdvisoryLock(pool, 1001, async () => {
|
|
274
|
+
await runPendingMigrations();
|
|
275
|
+
});
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
## Transaction Isolation Levels
|
|
279
|
+
|
|
280
|
+
```typescript
|
|
281
|
+
/**
|
|
282
|
+
* Isolation levels (from least to most strict):
|
|
283
|
+
*
|
|
284
|
+
* READ COMMITTED (PostgreSQL default):
|
|
285
|
+
* Each statement sees only committed data. Good for most workloads.
|
|
286
|
+
*
|
|
287
|
+
* REPEATABLE READ:
|
|
288
|
+
* The transaction sees a snapshot from its start. Use for reports
|
|
289
|
+
* or calculations that read the same data multiple times.
|
|
290
|
+
*
|
|
291
|
+
* SERIALIZABLE:
|
|
292
|
+
* Full isolation — transactions behave as if run sequentially.
|
|
293
|
+
* Highest safety, highest contention. Use for financial operations.
|
|
294
|
+
*/
|
|
295
|
+
export async function transferFunds(
|
|
296
|
+
pool: Pool,
|
|
297
|
+
fromAccountId: string,
|
|
298
|
+
toAccountId: string,
|
|
299
|
+
amount: number,
|
|
300
|
+
): Promise<void> {
|
|
301
|
+
const client = await pool.connect();
|
|
302
|
+
try {
|
|
303
|
+
await client.query('BEGIN ISOLATION LEVEL SERIALIZABLE');
|
|
304
|
+
|
|
305
|
+
const from = await client.query(
|
|
306
|
+
'SELECT balance FROM accounts WHERE id = $1 FOR UPDATE',
|
|
307
|
+
[fromAccountId],
|
|
308
|
+
);
|
|
309
|
+
if (from.rows[0].balance < amount) {
|
|
310
|
+
throw new ValidationError('Insufficient funds');
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
await client.query(
|
|
314
|
+
'UPDATE accounts SET balance = balance - $1 WHERE id = $2',
|
|
315
|
+
[amount, fromAccountId],
|
|
316
|
+
);
|
|
317
|
+
await client.query(
|
|
318
|
+
'UPDATE accounts SET balance = balance + $1 WHERE id = $2',
|
|
319
|
+
[amount, toAccountId],
|
|
320
|
+
);
|
|
321
|
+
|
|
322
|
+
await client.query('COMMIT');
|
|
323
|
+
} catch (error) {
|
|
324
|
+
await client.query('ROLLBACK');
|
|
325
|
+
throw error;
|
|
326
|
+
} finally {
|
|
327
|
+
client.release();
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
```
|
|
331
|
+
|
|
332
|
+
## Trade-offs
|
|
333
|
+
|
|
334
|
+
- **Migration safety vs speed:** Zero-downtime migrations require more steps but avoid service interruption.
|
|
335
|
+
- **Pool size:** Too small causes connection starvation under load. Too large wastes database memory and can degrade performance.
|
|
336
|
+
- **Read replicas:** Reduce primary load but introduce replication lag. Not every read can tolerate stale data.
|
|
337
|
+
- **Stronger isolation:** Prevents anomalies but increases transaction conflicts and retries.
|
|
338
|
+
- **Index count:** More indexes speed reads but slow writes and consume storage.
|
|
339
|
+
|
|
340
|
+
## Common Pitfalls
|
|
341
|
+
|
|
342
|
+
1. **Irreversible migrations** — Every migration must have a working `down()`. Test it before deploying.
|
|
343
|
+
2. **Missing indexes on foreign keys** — Unindexed foreign keys cause sequential scans on JOIN and DELETE operations.
|
|
344
|
+
3. **N+1 queries** — Loading a list and then querying each item individually. Use JOINs or batch queries.
|
|
345
|
+
4. **Long-running transactions** — Hold locks as briefly as possible. Move non-database work outside the transaction.
|
|
346
|
+
5. **No connection timeout** — Without statement and connection timeouts, a single slow query can exhaust the pool.
|
|
347
|
+
6. **Ignoring EXPLAIN output** — Query plans change as data grows. Review plans periodically, not just at development time.
|