nttp 1.4.11 → 1.4.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,681 @@
1
+ # Production Deployment Guide
2
+
3
+ Best practices for deploying NTTP in production environments.
4
+
5
+ ## Table of Contents
6
+
7
+ - [Quick Checklist](#quick-checklist)
8
+ - [Infrastructure Setup](#infrastructure-setup)
9
+ - [Security](#security)
10
+ - [Performance Optimization](#performance-optimization)
11
+ - [Monitoring](#monitoring)
12
+ - [Error Handling](#error-handling)
13
+ - [Scaling](#scaling)
14
+ - [Cost Optimization](#cost-optimization)
15
+
16
+ ---
17
+
18
+ ## Quick Checklist
19
+
20
+ Before going to production:
21
+
22
+ - [ ] **Redis configured** for L1 cache persistence
23
+ - [ ] **Environment variables** secured (not in code)
24
+ - [ ] **Database connection pooling** enabled
25
+ - [ ] **Error logging** configured
26
+ - [ ] **Rate limiting** implemented
27
+ - [ ] **L2 semantic cache** enabled (if high query variation)
28
+ - [ ] **Cache monitoring** set up
29
+ - [ ] **Backup LLM provider** configured (optional)
30
+ - [ ] **Query length limits** enforced
31
+ - [ ] **Result size limits** configured
32
+
33
+ ---
34
+
35
+ ## Infrastructure Setup
36
+
37
+ ### Recommended Architecture
38
+
39
+ ```
40
+ ┌─────────────┐
41
+ │ Client │
42
+ └──────┬──────┘
43
+
44
+
45
+ ┌─────────────────────────────────────────┐
46
+ │ Application Server(s) │
47
+ │ ┌─────────────────────────────────┐ │
48
+ │ │ NTTP Instance │ │
49
+ │ └─────────────────────────────────┘ │
50
+ └──┬──────────┬──────────┬───────────────┘
51
+ │ │ │
52
+ ▼ ▼ ▼
53
+ ┌──────┐ ┌──────┐ ┌───────────┐
54
+ │ Redis│ │ DB │ │ LLM API │
55
+ │Cache │ │ │ │(Anthropic)│
56
+ └──────┘ └──────┘ └───────────┘
57
+ ```
58
+
59
+ ---
60
+
61
+ ### Required Services
62
+
63
+ **1. Redis (Required for production)**
64
+
65
+ ```bash
66
+ # Docker
67
+ docker run -d --name redis -p 6379:6379 redis:latest
68
+
69
+ # Or use managed service
70
+ # - AWS ElastiCache
71
+ # - Redis Cloud
72
+ # - Upstash
73
+ ```
74
+
75
+ **2. Database**
76
+
77
+ ```bash
78
+ # Use connection pooling
79
+ DATABASE_URL=postgresql://user:pass@localhost:5432/db?pool_min=2&pool_max=10
80
+ ```
81
+
82
+ ---
83
+
84
+ ### Configuration
85
+
86
+ **Production `.env`:**
87
+
88
+ ```bash
89
+ # Database
90
+ DATABASE_TYPE=pg
91
+ DATABASE_URL=postgresql://user:pass@prod-db.example.com:5432/mydb
92
+
93
+ # LLM
94
+ LLM_PROVIDER=anthropic
95
+ LLM_MODEL=claude-sonnet-4-5-20250929
96
+ ANTHROPIC_API_KEY=sk-ant-production-key-here
97
+
98
+ # Cache (REQUIRED for production)
99
+ REDIS_URL=redis://:password@prod-redis.example.com:6379
100
+ OPENAI_API_KEY=sk-openai-key-for-l2-cache
101
+
102
+ # Limits
103
+ MAX_QUERY_LENGTH=300
104
+ DEFAULT_LIMIT=50
105
+ MAX_LIMIT=500
106
+ ```
107
+
108
+ ---
109
+
110
+ ### Docker Deployment
111
+
112
+ **Dockerfile:**
113
+
114
+ ```dockerfile
115
+ FROM node:20-alpine
116
+
117
+ WORKDIR /app
118
+
119
+ # Copy package files
120
+ COPY package*.json ./
121
+
122
+ # Install dependencies
123
+ RUN npm ci --only=production
124
+
125
+ # Copy application
126
+ COPY . .
127
+
128
+ # Build if using TypeScript
129
+ RUN npm run build
130
+
131
+ # Start application
132
+ CMD ["node", "dist/server.js"]
133
+ ```
134
+
135
+ **docker-compose.yml:**
136
+
137
+ ```yaml
138
+ version: '3.8'
139
+
140
+ services:
141
+ app:
142
+ build: .
143
+ ports:
144
+ - "3000:3000"
145
+ environment:
146
+ - DATABASE_URL=postgresql://user:pass@db:5432/mydb
147
+ - REDIS_URL=redis://redis:6379
148
+ - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
149
+ - OPENAI_API_KEY=${OPENAI_API_KEY}
150
+ depends_on:
151
+ - db
152
+ - redis
153
+
154
+ db:
155
+ image: postgres:15-alpine
156
+ environment:
157
+ - POSTGRES_PASSWORD=password
158
+ - POSTGRES_DB=mydb
159
+ volumes:
160
+ - pgdata:/var/lib/postgresql/data
161
+
162
+ redis:
163
+ image: redis:7-alpine
164
+ volumes:
165
+ - redisdata:/data
166
+
167
+ volumes:
168
+ pgdata:
169
+ redisdata:
170
+ ```
171
+
172
+ ---
173
+
174
+ ## Security
175
+
176
+ ### Environment Variables
177
+
178
+ **❌ Never do this:**
179
+
180
+ ```typescript
181
+ // Hardcoded keys - DON'T DO THIS
182
+ const nttp = new NTTP({
183
+ llm: {
184
+ apiKey: 'sk-ant-1234567890' // ❌ WRONG
185
+ }
186
+ });
187
+ ```
188
+
189
+ **✅ Always do this:**
190
+
191
+ ```typescript
192
+ // Use environment variables
193
+ const nttp = new NTTP({
194
+ llm: {
195
+ apiKey: process.env.ANTHROPIC_API_KEY // ✅ CORRECT
196
+ }
197
+ });
198
+
199
+ // Or better, use fromEnv()
200
+ const nttp = await NTTP.fromEnv(); // ✅ BEST
201
+ ```
202
+
203
+ ---
204
+
205
+ ### Input Validation
206
+
207
+ ```typescript
208
+ import { z } from 'zod';
209
+
210
+ const querySchema = z.object({
211
+ query: z.string()
212
+ .min(1, 'Query cannot be empty')
213
+ .max(300, 'Query too long')
214
+ .regex(/^[a-zA-Z0-9\s,?!.]+$/, 'Invalid characters')
215
+ });
216
+
217
+ app.post('/api/query', async (req, res) => {
218
+ try {
219
+ // Validate input
220
+ const { query } = querySchema.parse(req.body);
221
+
222
+ const result = await nttp.query(query);
223
+ res.json({ data: result.data });
224
+ } catch (error) {
225
+ if (error instanceof z.ZodError) {
226
+ return res.status(400).json({ error: error.errors });
227
+ }
228
+ res.status(500).json({ error: 'Internal error' });
229
+ }
230
+ });
231
+ ```
232
+
233
+ ---
234
+
235
+ ### Rate Limiting
236
+
237
+ ```typescript
238
+ import rateLimit from 'express-rate-limit';
239
+
240
+ // Per-IP rate limiting
241
+ const limiter = rateLimit({
242
+ windowMs: 15 * 60 * 1000, // 15 minutes
243
+ max: 100, // 100 requests per window
244
+ message: 'Too many requests',
245
+ standardHeaders: true,
246
+ legacyHeaders: false
247
+ });
248
+
249
+ app.use('/api/query', limiter);
250
+
251
+ // Per-user rate limiting (if authenticated)
252
+ const userLimiter = rateLimit({
253
+ windowMs: 60 * 60 * 1000, // 1 hour
254
+ max: 500,
255
+ keyGenerator: (req) => req.user?.id || req.ip
256
+ });
257
+
258
+ app.use('/api/query', userLimiter);
259
+ ```
260
+
261
+ ---
262
+
263
+ ### Database Access Control
264
+
265
+ ```sql
266
+ -- Create read-only user for NTTP
267
+ CREATE ROLE nttp_readonly;
268
+ GRANT CONNECT ON DATABASE mydb TO nttp_readonly;
269
+ GRANT USAGE ON SCHEMA public TO nttp_readonly;
270
+ GRANT SELECT ON ALL TABLES IN SCHEMA public TO nttp_readonly;
271
+
272
+ -- Create specific user
273
+ CREATE USER nttp_app WITH PASSWORD 'secure_password';
274
+ GRANT nttp_readonly TO nttp_app;
275
+ ```
276
+
277
+ Then use this user in your connection:
278
+
279
+ ```bash
280
+ DATABASE_URL=postgresql://nttp_app:secure_password@localhost:5432/mydb
281
+ ```
282
+
283
+ ---
284
+
285
+ ## Performance Optimization
286
+
287
+ ### Cache Pre-warming
288
+
289
+ Warm cache on startup for common queries:
290
+
291
+ ```typescript
292
+ async function warmCache(nttp: NTTP) {
293
+ const commonQueries = [
294
+ "show active users",
295
+ "count pending orders",
296
+ "top 10 products by revenue",
297
+ "recent orders from last 7 days"
298
+ ];
299
+
300
+ console.log('Warming cache...');
301
+
302
+ for (const query of commonQueries) {
303
+ try {
304
+ await nttp.query(query);
305
+ console.log(`✓ Cached: ${query}`);
306
+ } catch (error) {
307
+ console.error(`✗ Failed: ${query}`, error.message);
308
+ }
309
+ }
310
+
311
+ console.log('Cache warming complete');
312
+ }
313
+
314
+ // On startup
315
+ const nttp = await NTTP.fromEnv();
316
+ await warmCache(nttp);
317
+ ```
318
+
319
+ ---
320
+
321
+ ### Connection Pooling
322
+
323
+ ```typescript
324
+ database: {
325
+ client: 'pg',
326
+ connection: {
327
+ host: process.env.DB_HOST,
328
+ database: process.env.DB_NAME,
329
+ user: process.env.DB_USER,
330
+ password: process.env.DB_PASSWORD,
331
+ // Connection pooling
332
+ pool: {
333
+ min: 2,
334
+ max: 10,
335
+ acquireTimeoutMillis: 30000,
336
+ idleTimeoutMillis: 30000
337
+ }
338
+ }
339
+ }
340
+ ```
341
+
342
+ ---
343
+
344
+ ### Result Size Limits
345
+
346
+ ```typescript
347
+ limits: {
348
+ maxQueryLength: 300, // Prevent very long queries
349
+ defaultLimit: 50, // Reasonable default
350
+ maxLimit: 500 // Prevent huge result sets
351
+ }
352
+ ```
353
+
354
+ ---
355
+
356
+ ### Timeouts
357
+
358
+ ```typescript
359
+ // Add timeout to queries
360
+ async function queryWithTimeout(query: string, timeoutMs = 10000) {
361
+ const timeoutPromise = new Promise((_, reject) =>
362
+ setTimeout(() => reject(new Error('Query timeout')), timeoutMs)
363
+ );
364
+
365
+ return Promise.race([
366
+ nttp.query(query),
367
+ timeoutPromise
368
+ ]);
369
+ }
370
+ ```
371
+
372
+ ---
373
+
374
+ ## Monitoring
375
+
376
+ ### Cache Metrics
377
+
378
+ ```typescript
379
+ async function logCacheMetrics() {
380
+ const stats = await nttp.getCacheStats();
381
+
382
+ console.log('Cache Stats:', {
383
+ totalSchemas: stats.totalSchemas,
384
+ pinnedSchemas: stats.pinnedSchemas,
385
+ avgUseCount: stats.averageUseCount
386
+ });
387
+
388
+ // Log to monitoring service
389
+ metrics.gauge('nttp.cache.total_schemas', stats.totalSchemas);
390
+ metrics.gauge('nttp.cache.avg_use_count', stats.averageUseCount);
391
+ }
392
+
393
+ // Run periodically
394
+ setInterval(logCacheMetrics, 60000); // Every minute
395
+ ```
396
+
397
+ ---
398
+
399
+ ### Query Performance
400
+
401
+ ```typescript
402
+ async function monitoredQuery(query: string) {
403
+ const startTime = Date.now();
404
+
405
+ try {
406
+ const result = await nttp.query(query);
407
+ const duration = Date.now() - startTime;
408
+
409
+ // Log metrics
410
+ metrics.histogram('nttp.query.duration', duration, {
411
+ cacheHit: result.cacheHit,
412
+ cacheLayer: result.meta?.cacheLayer
413
+ });
414
+
415
+ metrics.counter('nttp.query.success', 1, {
416
+ cacheLayer: result.meta?.cacheLayer
417
+ });
418
+
419
+ return result;
420
+ } catch (error) {
421
+ const duration = Date.now() - startTime;
422
+
423
+ metrics.counter('nttp.query.error', 1, {
424
+ errorType: error.constructor.name
425
+ });
426
+
427
+ throw error;
428
+ }
429
+ }
430
+ ```
431
+
432
+ ---
433
+
434
+ ### Error Tracking
435
+
436
+ ```typescript
437
+ import * as Sentry from '@sentry/node';
438
+
439
+ try {
440
+ const result = await nttp.query(query);
441
+ } catch (error) {
442
+ // Log to Sentry with context
443
+ Sentry.captureException(error, {
444
+ tags: {
445
+ component: 'nttp',
446
+ errorType: error.constructor.name
447
+ },
448
+ extra: {
449
+ query,
450
+ suggestions: error.suggestions
451
+ }
452
+ });
453
+
454
+ throw error;
455
+ }
456
+ ```
457
+
458
+ ---
459
+
460
+ ### Health Checks
461
+
462
+ ```typescript
463
+ app.get('/health', async (req, res) => {
464
+ try {
465
+ // Check database
466
+ const tables = await nttp.getTables();
467
+
468
+ // Check Redis (if using)
469
+ // await redis.ping();
470
+
471
+ // Check LLM (optional - may be slow)
472
+ // const test = await nttp.explain("test query");
473
+
474
+ res.json({
475
+ status: 'healthy',
476
+ checks: {
477
+ database: 'ok',
478
+ cache: 'ok',
479
+ llm: 'ok'
480
+ }
481
+ });
482
+ } catch (error) {
483
+ res.status(503).json({
484
+ status: 'unhealthy',
485
+ error: error.message
486
+ });
487
+ }
488
+ });
489
+ ```
490
+
491
+ ---
492
+
493
+ ## Error Handling
494
+
495
+ ### Graceful Degradation
496
+
497
+ ```typescript
498
+ async function resilientQuery(query: string) {
499
+ try {
500
+ // Try primary LLM
501
+ return await nttp.query(query);
502
+ } catch (error) {
503
+ if (error instanceof LLMError) {
504
+ // LLM failed - try backup provider
505
+ console.error('Primary LLM failed, trying backup...');
506
+
507
+ // Could switch to backup NTTP instance with different provider
508
+ return await backupNTTP.query(query);
509
+ }
510
+
511
+ throw error;
512
+ }
513
+ }
514
+ ```
515
+
516
+ ---
517
+
518
+ ### Retry Logic
519
+
520
+ ```typescript
521
+ async function queryWithRetry(
522
+ query: string,
523
+ maxRetries = 3,
524
+ backoff = 1000
525
+ ) {
526
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
527
+ try {
528
+ return await nttp.query(query);
529
+ } catch (error) {
530
+ if (attempt === maxRetries) throw error;
531
+
532
+ // Exponential backoff
533
+ const delay = backoff * Math.pow(2, attempt - 1);
534
+ console.log(`Retry ${attempt}/${maxRetries} after ${delay}ms...`);
535
+ await new Promise(resolve => setTimeout(resolve, delay));
536
+ }
537
+ }
538
+ }
539
+ ```
540
+
541
+ ---
542
+
543
+ ## Scaling
544
+
545
+ ### Horizontal Scaling
546
+
547
+ **Multiple app instances with shared cache:**
548
+
549
+ ```
550
+ ┌────────────┐
551
+ │ Instance 1 │───┐
552
+ └────────────┘ │
553
+ ├──→ Redis ──→ Database
554
+ ┌────────────┐ │
555
+ │ Instance 2 │───┘
556
+ └────────────┘
557
+ ```
558
+
559
+ All instances share the same Redis cache, so L1 hits work across instances.
560
+
561
+ ---
562
+
563
+ ### Load Balancing
564
+
565
+ ```nginx
566
+ # nginx.conf
567
+ upstream nttp_backend {
568
+ least_conn; # Route to least busy server
569
+ server app1:3000;
570
+ server app2:3000;
571
+ server app3:3000;
572
+ }
573
+
574
+ server {
575
+ listen 80;
576
+
577
+ location /api/query {
578
+ proxy_pass http://nttp_backend;
579
+ proxy_set_header Host $host;
580
+ proxy_set_header X-Real-IP $remote_addr;
581
+ }
582
+ }
583
+ ```
584
+
585
+ ---
586
+
587
+ ### Database Read Replicas
588
+
589
+ For high read volume:
590
+
591
+ ```typescript
592
+ // Primary for writes (NTTP doesn't write, but for context)
593
+ const primaryDb = knex({
594
+ client: 'pg',
595
+ connection: process.env.DATABASE_PRIMARY_URL
596
+ });
597
+
598
+ // Read replica for NTTP queries
599
+ const replicaDb = knex({
600
+ client: 'pg',
601
+ connection: process.env.DATABASE_REPLICA_URL
602
+ });
603
+
604
+ const nttp = new NTTP({
605
+ database: {
606
+ client: 'pg',
607
+ connection: process.env.DATABASE_REPLICA_URL // Use replica
608
+ },
609
+ // ... other config
610
+ });
611
+ ```
612
+
613
+ ---
614
+
615
+ ## Cost Optimization
616
+
617
+ ### Cache Hit Rate Monitoring
618
+
619
+ ```typescript
620
+ async function analyzeCachePerformance() {
621
+ const queries = [];
622
+
623
+ // Track queries for 1 hour
624
+ for (const query of trackedQueries) {
625
+ const result = await nttp.query(query);
626
+ queries.push({
627
+ query,
628
+ cacheLayer: result.meta?.cacheLayer,
629
+ cost: result.meta?.cost
630
+ });
631
+ }
632
+
633
+ // Calculate hit rates
634
+ const l1Hits = queries.filter(q => q.cacheLayer === 1).length;
635
+ const l2Hits = queries.filter(q => q.cacheLayer === 2).length;
636
+ const l3Misses = queries.filter(q => q.cacheLayer === 3).length;
637
+ const total = queries.length;
638
+
639
+ console.log({
640
+ l1HitRate: (l1Hits / total * 100).toFixed(2) + '%',
641
+ l2HitRate: (l2Hits / total * 100).toFixed(2) + '%',
642
+ l3MissRate: (l3Misses / total * 100).toFixed(2) + '%',
643
+ totalCost: queries.reduce((sum, q) => sum + q.cost, 0).toFixed(4)
644
+ });
645
+ }
646
+ ```
647
+
648
+ ---
649
+
650
+ ### Cost Projections
651
+
652
+ ```typescript
653
+ // Monthly cost estimation
654
+ function estimateMonthlyCost(queriesPerDay: number, cacheHitRate: number) {
655
+ const queriesPerMonth = queriesPerDay * 30;
656
+ const cacheMisses = queriesPerMonth * (1 - cacheHitRate);
657
+ const costPerQuery = 0.01; // Claude Sonnet
658
+
659
+ const llmCost = cacheMisses * costPerQuery;
660
+ const embeddingCost = queriesPerMonth * 0.0001; // L2 cache
661
+
662
+ return {
663
+ llmCost: llmCost.toFixed(2),
664
+ embeddingCost: embeddingCost.toFixed(2),
665
+ total: (llmCost + embeddingCost).toFixed(2)
666
+ };
667
+ }
668
+
669
+ // Example: 10,000 queries/day, 85% cache hit rate
670
+ console.log(estimateMonthlyCost(10000, 0.85));
671
+ // { llmCost: '45.00', embeddingCost: '30.00', total: '75.00' }
672
+ ```
673
+
674
+ ---
675
+
676
+ ## See Also
677
+
678
+ - [Configuration](./configuration.md) - Configuration reference
679
+ - [Caching](./caching.md) - Cache optimization
680
+ - [Troubleshooting](./troubleshooting.md) - Common issues
681
+ - [Examples](./examples.md) - Usage examples