nttp 1.4.11 → 1.4.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -19
- package/dist/cli/docs.d.ts.map +1 -1
- package/dist/cli/docs.js +8 -0
- package/dist/cli/docs.js.map +1 -1
- package/dist/cli.js +1 -1
- package/dist/executor.d.ts +9 -0
- package/dist/executor.d.ts.map +1 -1
- package/dist/executor.js +118 -4
- package/dist/executor.js.map +1 -1
- package/docs/README.md +31 -0
- package/docs/api.md +571 -0
- package/docs/caching.md +579 -0
- package/docs/configuration.md +763 -0
- package/docs/examples.md +615 -0
- package/docs/models.md +423 -0
- package/docs/production.md +681 -0
- package/docs/troubleshooting.md +694 -0
- package/package.json +2 -1
|
@@ -0,0 +1,681 @@
|
|
|
1
|
+
# Production Deployment Guide
|
|
2
|
+
|
|
3
|
+
Best practices for deploying NTTP in production environments.
|
|
4
|
+
|
|
5
|
+
## Table of Contents
|
|
6
|
+
|
|
7
|
+
- [Quick Checklist](#quick-checklist)
|
|
8
|
+
- [Infrastructure Setup](#infrastructure-setup)
|
|
9
|
+
- [Security](#security)
|
|
10
|
+
- [Performance Optimization](#performance-optimization)
|
|
11
|
+
- [Monitoring](#monitoring)
|
|
12
|
+
- [Error Handling](#error-handling)
|
|
13
|
+
- [Scaling](#scaling)
|
|
14
|
+
- [Cost Optimization](#cost-optimization)
|
|
15
|
+
|
|
16
|
+
---
|
|
17
|
+
|
|
18
|
+
## Quick Checklist
|
|
19
|
+
|
|
20
|
+
Before going to production:
|
|
21
|
+
|
|
22
|
+
- [ ] **Redis configured** for L1 cache persistence
|
|
23
|
+
- [ ] **Environment variables** secured (not in code)
|
|
24
|
+
- [ ] **Database connection pooling** enabled
|
|
25
|
+
- [ ] **Error logging** configured
|
|
26
|
+
- [ ] **Rate limiting** implemented
|
|
27
|
+
- [ ] **L2 semantic cache** enabled (if high query variation)
|
|
28
|
+
- [ ] **Cache monitoring** set up
|
|
29
|
+
- [ ] **Backup LLM provider** configured (optional)
|
|
30
|
+
- [ ] **Query length limits** enforced
|
|
31
|
+
- [ ] **Result size limits** configured
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
## Infrastructure Setup
|
|
36
|
+
|
|
37
|
+
### Recommended Architecture
|
|
38
|
+
|
|
39
|
+
```
|
|
40
|
+
┌─────────────┐
|
|
41
|
+
│ Client │
|
|
42
|
+
└──────┬──────┘
|
|
43
|
+
│
|
|
44
|
+
▼
|
|
45
|
+
┌─────────────────────────────────────────┐
|
|
46
|
+
│ Application Server(s) │
|
|
47
|
+
│ ┌─────────────────────────────────┐ │
|
|
48
|
+
│ │ NTTP Instance │ │
|
|
49
|
+
│ └─────────────────────────────────┘ │
|
|
50
|
+
└──┬──────────┬──────────┬───────────────┘
|
|
51
|
+
│ │ │
|
|
52
|
+
▼ ▼ ▼
|
|
53
|
+
┌──────┐ ┌──────┐ ┌───────────┐
|
|
54
|
+
│ Redis│ │ DB │ │ LLM API │
|
|
55
|
+
│Cache │ │ │ │(Anthropic)│
|
|
56
|
+
└──────┘ └──────┘ └───────────┘
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
---
|
|
60
|
+
|
|
61
|
+
### Required Services
|
|
62
|
+
|
|
63
|
+
**1. Redis (Required for production)**
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
# Docker
|
|
67
|
+
docker run -d --name redis -p 6379:6379 redis:latest
|
|
68
|
+
|
|
69
|
+
# Or use managed service
|
|
70
|
+
# - AWS ElastiCache
|
|
71
|
+
# - Redis Cloud
|
|
72
|
+
# - Upstash
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
**2. Database**
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
# Use connection pooling
|
|
79
|
+
DATABASE_URL=postgresql://user:pass@localhost:5432/db?pool_min=2&pool_max=10
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
84
|
+
### Configuration
|
|
85
|
+
|
|
86
|
+
**Production `.env`:**
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
# Database
|
|
90
|
+
DATABASE_TYPE=pg
|
|
91
|
+
DATABASE_URL=postgresql://user:pass@prod-db.example.com:5432/mydb
|
|
92
|
+
|
|
93
|
+
# LLM
|
|
94
|
+
LLM_PROVIDER=anthropic
|
|
95
|
+
LLM_MODEL=claude-sonnet-4-5-20250929
|
|
96
|
+
ANTHROPIC_API_KEY=sk-ant-production-key-here
|
|
97
|
+
|
|
98
|
+
# Cache (REQUIRED for production)
|
|
99
|
+
REDIS_URL=redis://:password@prod-redis.example.com:6379
|
|
100
|
+
OPENAI_API_KEY=sk-openai-key-for-l2-cache
|
|
101
|
+
|
|
102
|
+
# Limits
|
|
103
|
+
MAX_QUERY_LENGTH=300
|
|
104
|
+
DEFAULT_LIMIT=50
|
|
105
|
+
MAX_LIMIT=500
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
---
|
|
109
|
+
|
|
110
|
+
### Docker Deployment
|
|
111
|
+
|
|
112
|
+
**Dockerfile:**
|
|
113
|
+
|
|
114
|
+
```dockerfile
|
|
115
|
+
FROM node:20-alpine
|
|
116
|
+
|
|
117
|
+
WORKDIR /app
|
|
118
|
+
|
|
119
|
+
# Copy package files
|
|
120
|
+
COPY package*.json ./
|
|
121
|
+
|
|
122
|
+
# Install dependencies
|
|
123
|
+
RUN npm ci --only=production
|
|
124
|
+
|
|
125
|
+
# Copy application
|
|
126
|
+
COPY . .
|
|
127
|
+
|
|
128
|
+
# Build if using TypeScript
|
|
129
|
+
RUN npm run build
|
|
130
|
+
|
|
131
|
+
# Start application
|
|
132
|
+
CMD ["node", "dist/server.js"]
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
**docker-compose.yml:**
|
|
136
|
+
|
|
137
|
+
```yaml
|
|
138
|
+
version: '3.8'
|
|
139
|
+
|
|
140
|
+
services:
|
|
141
|
+
app:
|
|
142
|
+
build: .
|
|
143
|
+
ports:
|
|
144
|
+
- "3000:3000"
|
|
145
|
+
environment:
|
|
146
|
+
- DATABASE_URL=postgresql://user:pass@db:5432/mydb
|
|
147
|
+
- REDIS_URL=redis://redis:6379
|
|
148
|
+
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
|
|
149
|
+
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
|
150
|
+
depends_on:
|
|
151
|
+
- db
|
|
152
|
+
- redis
|
|
153
|
+
|
|
154
|
+
db:
|
|
155
|
+
image: postgres:15-alpine
|
|
156
|
+
environment:
|
|
157
|
+
- POSTGRES_PASSWORD=password
|
|
158
|
+
- POSTGRES_DB=mydb
|
|
159
|
+
volumes:
|
|
160
|
+
- pgdata:/var/lib/postgresql/data
|
|
161
|
+
|
|
162
|
+
redis:
|
|
163
|
+
image: redis:7-alpine
|
|
164
|
+
volumes:
|
|
165
|
+
- redisdata:/data
|
|
166
|
+
|
|
167
|
+
volumes:
|
|
168
|
+
pgdata:
|
|
169
|
+
redisdata:
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
## Security
|
|
175
|
+
|
|
176
|
+
### Environment Variables
|
|
177
|
+
|
|
178
|
+
**❌ Never do this:**
|
|
179
|
+
|
|
180
|
+
```typescript
|
|
181
|
+
// Hardcoded keys - DON'T DO THIS
|
|
182
|
+
const nttp = new NTTP({
|
|
183
|
+
llm: {
|
|
184
|
+
apiKey: 'sk-ant-1234567890' // ❌ WRONG
|
|
185
|
+
}
|
|
186
|
+
});
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
**✅ Always do this:**
|
|
190
|
+
|
|
191
|
+
```typescript
|
|
192
|
+
// Use environment variables
|
|
193
|
+
const nttp = new NTTP({
|
|
194
|
+
llm: {
|
|
195
|
+
apiKey: process.env.ANTHROPIC_API_KEY // ✅ CORRECT
|
|
196
|
+
}
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
// Or better, use fromEnv()
|
|
200
|
+
const nttp = await NTTP.fromEnv(); // ✅ BEST
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
---
|
|
204
|
+
|
|
205
|
+
### Input Validation
|
|
206
|
+
|
|
207
|
+
```typescript
|
|
208
|
+
import { z } from 'zod';
|
|
209
|
+
|
|
210
|
+
const querySchema = z.object({
|
|
211
|
+
query: z.string()
|
|
212
|
+
.min(1, 'Query cannot be empty')
|
|
213
|
+
.max(300, 'Query too long')
|
|
214
|
+
.regex(/^[a-zA-Z0-9\s,?!.]+$/, 'Invalid characters')
|
|
215
|
+
});
|
|
216
|
+
|
|
217
|
+
app.post('/api/query', async (req, res) => {
|
|
218
|
+
try {
|
|
219
|
+
// Validate input
|
|
220
|
+
const { query } = querySchema.parse(req.body);
|
|
221
|
+
|
|
222
|
+
const result = await nttp.query(query);
|
|
223
|
+
res.json({ data: result.data });
|
|
224
|
+
} catch (error) {
|
|
225
|
+
if (error instanceof z.ZodError) {
|
|
226
|
+
return res.status(400).json({ error: error.errors });
|
|
227
|
+
}
|
|
228
|
+
res.status(500).json({ error: 'Internal error' });
|
|
229
|
+
}
|
|
230
|
+
});
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
---
|
|
234
|
+
|
|
235
|
+
### Rate Limiting
|
|
236
|
+
|
|
237
|
+
```typescript
|
|
238
|
+
import rateLimit from 'express-rate-limit';
|
|
239
|
+
|
|
240
|
+
// Per-IP rate limiting
|
|
241
|
+
const limiter = rateLimit({
|
|
242
|
+
windowMs: 15 * 60 * 1000, // 15 minutes
|
|
243
|
+
max: 100, // 100 requests per window
|
|
244
|
+
message: 'Too many requests',
|
|
245
|
+
standardHeaders: true,
|
|
246
|
+
legacyHeaders: false
|
|
247
|
+
});
|
|
248
|
+
|
|
249
|
+
app.use('/api/query', limiter);
|
|
250
|
+
|
|
251
|
+
// Per-user rate limiting (if authenticated)
|
|
252
|
+
const userLimiter = rateLimit({
|
|
253
|
+
windowMs: 60 * 60 * 1000, // 1 hour
|
|
254
|
+
max: 500,
|
|
255
|
+
keyGenerator: (req) => req.user?.id || req.ip
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
app.use('/api/query', userLimiter);
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
---
|
|
262
|
+
|
|
263
|
+
### Database Access Control
|
|
264
|
+
|
|
265
|
+
```sql
|
|
266
|
+
-- Create read-only user for NTTP
|
|
267
|
+
CREATE ROLE nttp_readonly;
|
|
268
|
+
GRANT CONNECT ON DATABASE mydb TO nttp_readonly;
|
|
269
|
+
GRANT USAGE ON SCHEMA public TO nttp_readonly;
|
|
270
|
+
GRANT SELECT ON ALL TABLES IN SCHEMA public TO nttp_readonly;
|
|
271
|
+
|
|
272
|
+
-- Create specific user
|
|
273
|
+
CREATE USER nttp_app WITH PASSWORD 'secure_password';
|
|
274
|
+
GRANT nttp_readonly TO nttp_app;
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
Then use this user in your connection:
|
|
278
|
+
|
|
279
|
+
```bash
|
|
280
|
+
DATABASE_URL=postgresql://nttp_app:secure_password@localhost:5432/mydb
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
---
|
|
284
|
+
|
|
285
|
+
## Performance Optimization
|
|
286
|
+
|
|
287
|
+
### Cache Pre-warming
|
|
288
|
+
|
|
289
|
+
Warm cache on startup for common queries:
|
|
290
|
+
|
|
291
|
+
```typescript
|
|
292
|
+
async function warmCache(nttp: NTTP) {
|
|
293
|
+
const commonQueries = [
|
|
294
|
+
"show active users",
|
|
295
|
+
"count pending orders",
|
|
296
|
+
"top 10 products by revenue",
|
|
297
|
+
"recent orders from last 7 days"
|
|
298
|
+
];
|
|
299
|
+
|
|
300
|
+
console.log('Warming cache...');
|
|
301
|
+
|
|
302
|
+
for (const query of commonQueries) {
|
|
303
|
+
try {
|
|
304
|
+
await nttp.query(query);
|
|
305
|
+
console.log(`✓ Cached: ${query}`);
|
|
306
|
+
} catch (error) {
|
|
307
|
+
console.error(`✗ Failed: ${query}`, error.message);
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
console.log('Cache warming complete');
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
// On startup
|
|
315
|
+
const nttp = await NTTP.fromEnv();
|
|
316
|
+
await warmCache(nttp);
|
|
317
|
+
```
|
|
318
|
+
|
|
319
|
+
---
|
|
320
|
+
|
|
321
|
+
### Connection Pooling
|
|
322
|
+
|
|
323
|
+
```typescript
|
|
324
|
+
database: {
|
|
325
|
+
client: 'pg',
|
|
326
|
+
connection: {
|
|
327
|
+
host: process.env.DB_HOST,
|
|
328
|
+
database: process.env.DB_NAME,
|
|
329
|
+
user: process.env.DB_USER,
|
|
330
|
+
password: process.env.DB_PASSWORD,
|
|
331
|
+
// Connection pooling
|
|
332
|
+
pool: {
|
|
333
|
+
min: 2,
|
|
334
|
+
max: 10,
|
|
335
|
+
acquireTimeoutMillis: 30000,
|
|
336
|
+
idleTimeoutMillis: 30000
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
```
|
|
341
|
+
|
|
342
|
+
---
|
|
343
|
+
|
|
344
|
+
### Result Size Limits
|
|
345
|
+
|
|
346
|
+
```typescript
|
|
347
|
+
limits: {
|
|
348
|
+
maxQueryLength: 300, // Prevent very long queries
|
|
349
|
+
defaultLimit: 50, // Reasonable default
|
|
350
|
+
maxLimit: 500 // Prevent huge result sets
|
|
351
|
+
}
|
|
352
|
+
```
|
|
353
|
+
|
|
354
|
+
---
|
|
355
|
+
|
|
356
|
+
### Timeouts
|
|
357
|
+
|
|
358
|
+
```typescript
|
|
359
|
+
// Add timeout to queries
|
|
360
|
+
async function queryWithTimeout(query: string, timeoutMs = 10000) {
|
|
361
|
+
const timeoutPromise = new Promise((_, reject) =>
|
|
362
|
+
setTimeout(() => reject(new Error('Query timeout')), timeoutMs)
|
|
363
|
+
);
|
|
364
|
+
|
|
365
|
+
return Promise.race([
|
|
366
|
+
nttp.query(query),
|
|
367
|
+
timeoutPromise
|
|
368
|
+
]);
|
|
369
|
+
}
|
|
370
|
+
```
|
|
371
|
+
|
|
372
|
+
---
|
|
373
|
+
|
|
374
|
+
## Monitoring
|
|
375
|
+
|
|
376
|
+
### Cache Metrics
|
|
377
|
+
|
|
378
|
+
```typescript
|
|
379
|
+
async function logCacheMetrics() {
|
|
380
|
+
const stats = await nttp.getCacheStats();
|
|
381
|
+
|
|
382
|
+
console.log('Cache Stats:', {
|
|
383
|
+
totalSchemas: stats.totalSchemas,
|
|
384
|
+
pinnedSchemas: stats.pinnedSchemas,
|
|
385
|
+
avgUseCount: stats.averageUseCount
|
|
386
|
+
});
|
|
387
|
+
|
|
388
|
+
// Log to monitoring service
|
|
389
|
+
metrics.gauge('nttp.cache.total_schemas', stats.totalSchemas);
|
|
390
|
+
metrics.gauge('nttp.cache.avg_use_count', stats.averageUseCount);
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
// Run periodically
|
|
394
|
+
setInterval(logCacheMetrics, 60000); // Every minute
|
|
395
|
+
```
|
|
396
|
+
|
|
397
|
+
---
|
|
398
|
+
|
|
399
|
+
### Query Performance
|
|
400
|
+
|
|
401
|
+
```typescript
|
|
402
|
+
async function monitoredQuery(query: string) {
|
|
403
|
+
const startTime = Date.now();
|
|
404
|
+
|
|
405
|
+
try {
|
|
406
|
+
const result = await nttp.query(query);
|
|
407
|
+
const duration = Date.now() - startTime;
|
|
408
|
+
|
|
409
|
+
// Log metrics
|
|
410
|
+
metrics.histogram('nttp.query.duration', duration, {
|
|
411
|
+
cacheHit: result.cacheHit,
|
|
412
|
+
cacheLayer: result.meta?.cacheLayer
|
|
413
|
+
});
|
|
414
|
+
|
|
415
|
+
metrics.counter('nttp.query.success', 1, {
|
|
416
|
+
cacheLayer: result.meta?.cacheLayer
|
|
417
|
+
});
|
|
418
|
+
|
|
419
|
+
return result;
|
|
420
|
+
} catch (error) {
|
|
421
|
+
const duration = Date.now() - startTime;
|
|
422
|
+
|
|
423
|
+
metrics.counter('nttp.query.error', 1, {
|
|
424
|
+
errorType: error.constructor.name
|
|
425
|
+
});
|
|
426
|
+
|
|
427
|
+
throw error;
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
```
|
|
431
|
+
|
|
432
|
+
---
|
|
433
|
+
|
|
434
|
+
### Error Tracking
|
|
435
|
+
|
|
436
|
+
```typescript
|
|
437
|
+
import * as Sentry from '@sentry/node';
|
|
438
|
+
|
|
439
|
+
try {
|
|
440
|
+
const result = await nttp.query(query);
|
|
441
|
+
} catch (error) {
|
|
442
|
+
// Log to Sentry with context
|
|
443
|
+
Sentry.captureException(error, {
|
|
444
|
+
tags: {
|
|
445
|
+
component: 'nttp',
|
|
446
|
+
errorType: error.constructor.name
|
|
447
|
+
},
|
|
448
|
+
extra: {
|
|
449
|
+
query,
|
|
450
|
+
suggestions: error.suggestions
|
|
451
|
+
}
|
|
452
|
+
});
|
|
453
|
+
|
|
454
|
+
throw error;
|
|
455
|
+
}
|
|
456
|
+
```
|
|
457
|
+
|
|
458
|
+
---
|
|
459
|
+
|
|
460
|
+
### Health Checks
|
|
461
|
+
|
|
462
|
+
```typescript
|
|
463
|
+
app.get('/health', async (req, res) => {
|
|
464
|
+
try {
|
|
465
|
+
// Check database
|
|
466
|
+
const tables = await nttp.getTables();
|
|
467
|
+
|
|
468
|
+
// Check Redis (if using)
|
|
469
|
+
// await redis.ping();
|
|
470
|
+
|
|
471
|
+
// Check LLM (optional - may be slow)
|
|
472
|
+
// const test = await nttp.explain("test query");
|
|
473
|
+
|
|
474
|
+
res.json({
|
|
475
|
+
status: 'healthy',
|
|
476
|
+
checks: {
|
|
477
|
+
database: 'ok',
|
|
478
|
+
cache: 'ok',
|
|
479
|
+
llm: 'ok'
|
|
480
|
+
}
|
|
481
|
+
});
|
|
482
|
+
} catch (error) {
|
|
483
|
+
res.status(503).json({
|
|
484
|
+
status: 'unhealthy',
|
|
485
|
+
error: error.message
|
|
486
|
+
});
|
|
487
|
+
}
|
|
488
|
+
});
|
|
489
|
+
```
|
|
490
|
+
|
|
491
|
+
---
|
|
492
|
+
|
|
493
|
+
## Error Handling
|
|
494
|
+
|
|
495
|
+
### Graceful Degradation
|
|
496
|
+
|
|
497
|
+
```typescript
|
|
498
|
+
async function resilientQuery(query: string) {
|
|
499
|
+
try {
|
|
500
|
+
// Try primary LLM
|
|
501
|
+
return await nttp.query(query);
|
|
502
|
+
} catch (error) {
|
|
503
|
+
if (error instanceof LLMError) {
|
|
504
|
+
// LLM failed - try backup provider
|
|
505
|
+
console.error('Primary LLM failed, trying backup...');
|
|
506
|
+
|
|
507
|
+
// Could switch to backup NTTP instance with different provider
|
|
508
|
+
return await backupNTTP.query(query);
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
throw error;
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
```
|
|
515
|
+
|
|
516
|
+
---
|
|
517
|
+
|
|
518
|
+
### Retry Logic
|
|
519
|
+
|
|
520
|
+
```typescript
|
|
521
|
+
async function queryWithRetry(
|
|
522
|
+
query: string,
|
|
523
|
+
maxRetries = 3,
|
|
524
|
+
backoff = 1000
|
|
525
|
+
) {
|
|
526
|
+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
527
|
+
try {
|
|
528
|
+
return await nttp.query(query);
|
|
529
|
+
} catch (error) {
|
|
530
|
+
if (attempt === maxRetries) throw error;
|
|
531
|
+
|
|
532
|
+
// Exponential backoff
|
|
533
|
+
const delay = backoff * Math.pow(2, attempt - 1);
|
|
534
|
+
console.log(`Retry ${attempt}/${maxRetries} after ${delay}ms...`);
|
|
535
|
+
await new Promise(resolve => setTimeout(resolve, delay));
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
```
|
|
540
|
+
|
|
541
|
+
---
|
|
542
|
+
|
|
543
|
+
## Scaling
|
|
544
|
+
|
|
545
|
+
### Horizontal Scaling
|
|
546
|
+
|
|
547
|
+
**Multiple app instances with shared cache:**
|
|
548
|
+
|
|
549
|
+
```
|
|
550
|
+
┌────────────┐
|
|
551
|
+
│ Instance 1 │───┐
|
|
552
|
+
└────────────┘ │
|
|
553
|
+
├──→ Redis ──→ Database
|
|
554
|
+
┌────────────┐ │
|
|
555
|
+
│ Instance 2 │───┘
|
|
556
|
+
└────────────┘
|
|
557
|
+
```
|
|
558
|
+
|
|
559
|
+
All instances share the same Redis cache, so L1 hits work across instances.
|
|
560
|
+
|
|
561
|
+
---
|
|
562
|
+
|
|
563
|
+
### Load Balancing
|
|
564
|
+
|
|
565
|
+
```nginx
|
|
566
|
+
# nginx.conf
|
|
567
|
+
upstream nttp_backend {
|
|
568
|
+
least_conn; # Route to least busy server
|
|
569
|
+
server app1:3000;
|
|
570
|
+
server app2:3000;
|
|
571
|
+
server app3:3000;
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
server {
|
|
575
|
+
listen 80;
|
|
576
|
+
|
|
577
|
+
location /api/query {
|
|
578
|
+
proxy_pass http://nttp_backend;
|
|
579
|
+
proxy_set_header Host $host;
|
|
580
|
+
proxy_set_header X-Real-IP $remote_addr;
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
```
|
|
584
|
+
|
|
585
|
+
---
|
|
586
|
+
|
|
587
|
+
### Database Read Replicas
|
|
588
|
+
|
|
589
|
+
For high read volume:
|
|
590
|
+
|
|
591
|
+
```typescript
|
|
592
|
+
// Primary for writes (NTTP doesn't write, but for context)
|
|
593
|
+
const primaryDb = knex({
|
|
594
|
+
client: 'pg',
|
|
595
|
+
connection: process.env.DATABASE_PRIMARY_URL
|
|
596
|
+
});
|
|
597
|
+
|
|
598
|
+
// Read replica for NTTP queries
|
|
599
|
+
const replicaDb = knex({
|
|
600
|
+
client: 'pg',
|
|
601
|
+
connection: process.env.DATABASE_REPLICA_URL
|
|
602
|
+
});
|
|
603
|
+
|
|
604
|
+
const nttp = new NTTP({
|
|
605
|
+
database: {
|
|
606
|
+
client: 'pg',
|
|
607
|
+
connection: process.env.DATABASE_REPLICA_URL // Use replica
|
|
608
|
+
},
|
|
609
|
+
// ... other config
|
|
610
|
+
});
|
|
611
|
+
```
|
|
612
|
+
|
|
613
|
+
---
|
|
614
|
+
|
|
615
|
+
## Cost Optimization
|
|
616
|
+
|
|
617
|
+
### Cache Hit Rate Monitoring
|
|
618
|
+
|
|
619
|
+
```typescript
|
|
620
|
+
async function analyzeCachePerformance() {
|
|
621
|
+
const queries = [];
|
|
622
|
+
|
|
623
|
+
// Track queries for 1 hour
|
|
624
|
+
for (const query of trackedQueries) {
|
|
625
|
+
const result = await nttp.query(query);
|
|
626
|
+
queries.push({
|
|
627
|
+
query,
|
|
628
|
+
cacheLayer: result.meta?.cacheLayer,
|
|
629
|
+
cost: result.meta?.cost
|
|
630
|
+
});
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
// Calculate hit rates
|
|
634
|
+
const l1Hits = queries.filter(q => q.cacheLayer === 1).length;
|
|
635
|
+
const l2Hits = queries.filter(q => q.cacheLayer === 2).length;
|
|
636
|
+
const l3Misses = queries.filter(q => q.cacheLayer === 3).length;
|
|
637
|
+
const total = queries.length;
|
|
638
|
+
|
|
639
|
+
console.log({
|
|
640
|
+
l1HitRate: (l1Hits / total * 100).toFixed(2) + '%',
|
|
641
|
+
l2HitRate: (l2Hits / total * 100).toFixed(2) + '%',
|
|
642
|
+
l3MissRate: (l3Misses / total * 100).toFixed(2) + '%',
|
|
643
|
+
totalCost: queries.reduce((sum, q) => sum + q.cost, 0).toFixed(4)
|
|
644
|
+
});
|
|
645
|
+
}
|
|
646
|
+
```
|
|
647
|
+
|
|
648
|
+
---
|
|
649
|
+
|
|
650
|
+
### Cost Projections
|
|
651
|
+
|
|
652
|
+
```typescript
|
|
653
|
+
// Monthly cost estimation
|
|
654
|
+
function estimateMonthlyCost(queriesPerDay: number, cacheHitRate: number) {
|
|
655
|
+
const queriesPerMonth = queriesPerDay * 30;
|
|
656
|
+
const cacheMisses = queriesPerMonth * (1 - cacheHitRate);
|
|
657
|
+
const costPerQuery = 0.01; // Claude Sonnet
|
|
658
|
+
|
|
659
|
+
const llmCost = cacheMisses * costPerQuery;
|
|
660
|
+
const embeddingCost = queriesPerMonth * 0.0001; // L2 cache
|
|
661
|
+
|
|
662
|
+
return {
|
|
663
|
+
llmCost: llmCost.toFixed(2),
|
|
664
|
+
embeddingCost: embeddingCost.toFixed(2),
|
|
665
|
+
total: (llmCost + embeddingCost).toFixed(2)
|
|
666
|
+
};
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
// Example: 10,000 queries/day, 85% cache hit rate
|
|
670
|
+
console.log(estimateMonthlyCost(10000, 0.85));
|
|
671
|
+
// { llmCost: '45.00', embeddingCost: '30.00', total: '75.00' }
|
|
672
|
+
```
|
|
673
|
+
|
|
674
|
+
---
|
|
675
|
+
|
|
676
|
+
## See Also
|
|
677
|
+
|
|
678
|
+
- [Configuration](./configuration.md) - Configuration reference
|
|
679
|
+
- [Caching](./caching.md) - Cache optimization
|
|
680
|
+
- [Troubleshooting](./troubleshooting.md) - Common issues
|
|
681
|
+
- [Examples](./examples.md) - Usage examples
|