@ancatag/n-r 0.1.14 → 0.1.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +536 -115
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,10 +1,29 @@
|
|
|
1
|
-
#
|
|
1
|
+
# @ancatag/n-r
|
|
2
2
|
|
|
3
|
-
Official Node.js/TypeScript SDK for Nova-route AI API
|
|
3
|
+
> **Official Node.js/TypeScript SDK for Nova-route AI API**
|
|
4
|
+
> Save 60-80% on AI token costs with intelligent multi-tier caching and OpenAI-compatible interface.
|
|
5
|
+
|
|
6
|
+
[](https://www.npmjs.com/package/@ancatag/n-r)
|
|
7
|
+
[](https://opensource.org/licenses/ISC)
|
|
8
|
+
[](https://nodejs.org/)
|
|
9
|
+
[](https://www.typescriptlang.org/)
|
|
10
|
+
|
|
11
|
+
**Nova-route** is an AI infrastructure platform that reduces your AI API costs by 60-80% through intelligent caching, semantic similarity matching, and RAG optimization. This SDK provides a drop-in replacement for OpenAI with automatic cost savings.
|
|
12
|
+
|
|
13
|
+
## Features
|
|
14
|
+
|
|
15
|
+
- 💰 **60-80% Token Cost Reduction** - Multi-tier caching (hot + semantic) automatically saves on redundant API calls
|
|
16
|
+
- 🔄 **OpenAI-Compatible API** - Drop-in replacement, just change your base URL
|
|
17
|
+
- ⚡ **Dual Transport** - REST (default) and gRPC (lower overhead) support
|
|
18
|
+
- 🌊 **Streaming Support** - Real-time response streaming with cancellation
|
|
19
|
+
- 🧠 **RAG Integration** - Retrieval-Augmented Generation for document-based AI
|
|
20
|
+
- 🎯 **Smart Routing** - Automatic model selection and route configuration
|
|
21
|
+
- 📊 **Cache Analytics** - Track savings, hit rates, and token usage
|
|
22
|
+
- 🔒 **TypeScript First** - Full type safety with exported types
|
|
23
|
+
- 🚀 **Zero Configuration** - Works out of the box with sensible defaults
|
|
4
24
|
|
|
5
25
|
## Installation
|
|
6
26
|
|
|
7
|
-
### From npm (when published)
|
|
8
27
|
```bash
|
|
9
28
|
npm install @ancatag/n-r
|
|
10
29
|
# or
|
|
@@ -13,54 +32,21 @@ pnpm add @ancatag/n-r
|
|
|
13
32
|
yarn add @ancatag/n-r
|
|
14
33
|
```
|
|
15
34
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
For local development and testing within the Nova monorepo or OpenChat app:
|
|
19
|
-
|
|
20
|
-
```bash
|
|
21
|
-
# In Nova repo
|
|
22
|
-
cd packages/sdk
|
|
23
|
-
pnpm build
|
|
24
|
-
|
|
25
|
-
# Link the package
|
|
26
|
-
pnpm link --global
|
|
27
|
-
|
|
28
|
-
# In OpenChat repo (or any project)
|
|
29
|
-
pnpm link --global @ancatag/n-r
|
|
30
|
-
```
|
|
31
|
-
|
|
32
|
-
Or use pnpm workspace (if both repos are in a workspace):
|
|
33
|
-
```bash
|
|
34
|
-
# In OpenChat package.json, add:
|
|
35
|
-
{
|
|
36
|
-
"dependencies": {
|
|
37
|
-
"@ancatag/n-r": "workspace:*"
|
|
38
|
-
}
|
|
39
|
-
}
|
|
40
|
-
```
|
|
41
|
-
|
|
42
|
-
### Using from OpenChat (local)
|
|
43
|
-
|
|
44
|
-
- Point the SDK to your local Nova API: `baseUrl: 'http://localhost:3000'` (REST) or `grpcUrl: '0.0.0.0:50051'` with `transport: 'grpc'`.
|
|
45
|
-
- Use a Nova-route API key (`nova_sk_...`) scoped to the project/model configs you need.
|
|
46
|
-
- Prefer workspace linking (above) or `pnpm link --global @ancatag/n-r` while developing both repos.
|
|
47
|
-
|
|
35
|
+
**Requirements:** Node.js 18+ (uses native `fetch`)
|
|
48
36
|
|
|
49
37
|
## Quick Start
|
|
50
38
|
|
|
39
|
+
### Basic Chat Completion
|
|
40
|
+
|
|
51
41
|
```typescript
|
|
52
42
|
import { NovaClient } from '@ancatag/n-r';
|
|
53
43
|
|
|
54
|
-
// Initialize client with API key from Nova-route UI
|
|
55
44
|
const client = new NovaClient({
|
|
56
|
-
apiKey: process.env.NOVA_API_KEY || 'nova_sk_...',
|
|
57
|
-
transport: process.env.NOVA_TRANSPORT || 'rest',
|
|
58
|
-
timeoutMs: 60000, // Optional, defaults to 60s
|
|
45
|
+
apiKey: process.env.NOVA_API_KEY || 'nova_sk_...',
|
|
59
46
|
});
|
|
60
47
|
|
|
61
|
-
// Non-streaming chat completion
|
|
62
48
|
const response = await client.chat.create({
|
|
63
|
-
model: 'llama2',
|
|
49
|
+
model: 'llama2', // or any model configured in your project
|
|
64
50
|
messages: [
|
|
65
51
|
{ role: 'system', content: 'You are a helpful assistant.' },
|
|
66
52
|
{ role: 'user', content: 'Hello!' }
|
|
@@ -72,27 +58,24 @@ const response = await client.chat.create({
|
|
|
72
58
|
console.log(response.choices[0].message.content);
|
|
73
59
|
console.log('Tokens used:', response.usage.total_tokens);
|
|
74
60
|
console.log('Cache hit:', response.nova?.cacheHit);
|
|
61
|
+
console.log('Tokens saved:', response.nova?.tokensSaved);
|
|
75
62
|
```
|
|
76
63
|
|
|
77
|
-
|
|
64
|
+
### Streaming
|
|
78
65
|
|
|
79
66
|
```typescript
|
|
80
|
-
// Streaming chat completion
|
|
81
67
|
const stream = client.chat.createStream({
|
|
82
68
|
model: 'llama2',
|
|
83
69
|
messages: [{ role: 'user', content: 'Tell me a story' }],
|
|
84
70
|
temperature: 0.7,
|
|
85
71
|
});
|
|
86
72
|
|
|
87
|
-
let fullContent = '';
|
|
88
73
|
for await (const chunk of stream) {
|
|
89
74
|
const content = chunk.choices[0]?.delta?.content || '';
|
|
90
75
|
if (content) {
|
|
91
76
|
process.stdout.write(content); // Print as it arrives
|
|
92
|
-
fullContent += content;
|
|
93
77
|
}
|
|
94
78
|
|
|
95
|
-
// Check if stream is complete
|
|
96
79
|
if (chunk.choices[0]?.finish_reason) {
|
|
97
80
|
console.log('\n\nStream complete!');
|
|
98
81
|
break;
|
|
@@ -100,7 +83,44 @@ for await (const chunk of stream) {
|
|
|
100
83
|
}
|
|
101
84
|
```
|
|
102
85
|
|
|
103
|
-
##
|
|
86
|
+
## Usage Examples
|
|
87
|
+
|
|
88
|
+
### Non-Streaming Chat Completion
|
|
89
|
+
|
|
90
|
+
```typescript
|
|
91
|
+
import { NovaClient } from '@ancatag/n-r';
|
|
92
|
+
|
|
93
|
+
const client = new NovaClient({
|
|
94
|
+
apiKey: 'nova_sk_...',
|
|
95
|
+
baseUrl: 'https://api.nova.ai', // Optional: defaults to http://localhost:3000
|
|
96
|
+
timeoutMs: 60000, // Optional: request timeout (default: 60000)
|
|
97
|
+
maxRetries: 3, // Optional: max retries (default: 2)
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
const response = await client.chat.create({
|
|
101
|
+
model: 'gpt-4', // Uses project default if not specified
|
|
102
|
+
messages: [
|
|
103
|
+
{ role: 'system', content: 'You are a helpful assistant.' },
|
|
104
|
+
{ role: 'user', content: 'Explain quantum computing in simple terms' }
|
|
105
|
+
],
|
|
106
|
+
temperature: 0.7,
|
|
107
|
+
max_tokens: 1000,
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
// Access response
|
|
111
|
+
console.log(response.choices[0].message.content);
|
|
112
|
+
|
|
113
|
+
// Access Nova-specific metrics
|
|
114
|
+
if (response.nova) {
|
|
115
|
+
console.log('Cache hit:', response.nova.cacheHit);
|
|
116
|
+
console.log('Cache layer:', response.nova.cacheLayer); // 'hot' | 'semantic' | null
|
|
117
|
+
console.log('Tokens saved:', response.nova.tokensSaved);
|
|
118
|
+
console.log('Response time:', response.nova.responseTimeMs, 'ms');
|
|
119
|
+
console.log('Request ID:', response.nova.requestId);
|
|
120
|
+
}
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### Streaming with Cancellation
|
|
104
124
|
|
|
105
125
|
```typescript
|
|
106
126
|
const controller = new AbortController();
|
|
@@ -112,11 +132,11 @@ try {
|
|
|
112
132
|
for await (const chunk of client.chat.createStream(
|
|
113
133
|
{
|
|
114
134
|
model: 'llama2',
|
|
115
|
-
messages: [{ role: 'user', content: '
|
|
135
|
+
messages: [{ role: 'user', content: 'Write a long story' }],
|
|
116
136
|
},
|
|
117
137
|
controller.signal
|
|
118
138
|
)) {
|
|
119
|
-
|
|
139
|
+
process.stdout.write(chunk.choices[0]?.delta?.content || '');
|
|
120
140
|
}
|
|
121
141
|
} catch (error) {
|
|
122
142
|
if (error.name === 'AbortError') {
|
|
@@ -125,10 +145,10 @@ try {
|
|
|
125
145
|
}
|
|
126
146
|
```
|
|
127
147
|
|
|
128
|
-
|
|
148
|
+
### Models API
|
|
129
149
|
|
|
130
150
|
```typescript
|
|
131
|
-
// List all available models
|
|
151
|
+
// List all available models in your project
|
|
132
152
|
const models = await client.models.list();
|
|
133
153
|
console.log('Available models:', models.map(m => m.id));
|
|
134
154
|
|
|
@@ -137,10 +157,10 @@ const model = await client.models.get('llama2');
|
|
|
137
157
|
console.log('Model:', model);
|
|
138
158
|
```
|
|
139
159
|
|
|
140
|
-
|
|
160
|
+
### Error Handling
|
|
141
161
|
|
|
142
162
|
```typescript
|
|
143
|
-
import { NovaClient, NovaError } from '@
|
|
163
|
+
import { NovaClient, NovaError } from '@ancatag/n-r';
|
|
144
164
|
|
|
145
165
|
const client = new NovaClient({
|
|
146
166
|
apiKey: 'nova_sk_...',
|
|
@@ -157,6 +177,19 @@ try {
|
|
|
157
177
|
console.error('Status:', error.status);
|
|
158
178
|
console.error('Code:', error.code);
|
|
159
179
|
console.error('Type:', error.type);
|
|
180
|
+
|
|
181
|
+
// Handle specific error codes
|
|
182
|
+
switch (error.code) {
|
|
183
|
+
case 'invalid_api_key':
|
|
184
|
+
console.error('Invalid API key');
|
|
185
|
+
break;
|
|
186
|
+
case 'model_not_found':
|
|
187
|
+
console.error('Model not found or not accessible');
|
|
188
|
+
break;
|
|
189
|
+
case 'rate_limit_exceeded':
|
|
190
|
+
console.error('Rate limit exceeded');
|
|
191
|
+
break;
|
|
192
|
+
}
|
|
160
193
|
} else {
|
|
161
194
|
console.error('Unexpected error:', error);
|
|
162
195
|
}
|
|
@@ -165,100 +198,205 @@ try {
|
|
|
165
198
|
|
|
166
199
|
## Nova-Specific Features
|
|
167
200
|
|
|
168
|
-
Nova extends the OpenAI API with
|
|
201
|
+
Nova extends the OpenAI API with powerful features for cost optimization and advanced routing:
|
|
202
|
+
|
|
203
|
+
### Cache Control
|
|
169
204
|
|
|
170
205
|
```typescript
|
|
171
206
|
const response = await client.chat.create({
|
|
172
207
|
model: 'llama2',
|
|
173
208
|
messages: [{ role: 'user', content: 'Hello' }],
|
|
174
209
|
|
|
175
|
-
// Nova-
|
|
210
|
+
// Nova-specific options
|
|
176
211
|
nova: {
|
|
177
|
-
skipCache: false, // Skip cache for this request
|
|
178
|
-
routeConfigId: 'uuid', // Route config ID (specifies which route configuration to use)
|
|
179
|
-
ragEnabled: true, // Enable RAG (if configured)
|
|
180
|
-
metadata: { userId: '123' }, // Custom metadata
|
|
181
|
-
systemPromptOverride: 'Custom system prompt', // Override system prompt
|
|
212
|
+
skipCache: false, // Skip cache lookup for this request (default: false)
|
|
182
213
|
},
|
|
183
214
|
});
|
|
184
215
|
|
|
185
|
-
// Response includes
|
|
216
|
+
// Response includes cache information
|
|
186
217
|
console.log('Cache hit:', response.nova?.cacheHit);
|
|
187
218
|
console.log('Cache layer:', response.nova?.cacheLayer); // 'hot' | 'semantic' | null
|
|
188
219
|
console.log('Tokens saved:', response.nova?.tokensSaved);
|
|
189
|
-
console.log('Response time:', response.nova?.responseTimeMs, 'ms');
|
|
190
|
-
console.log('Request ID:', response.nova?.requestId);
|
|
191
220
|
```
|
|
192
221
|
|
|
193
|
-
|
|
222
|
+
### Route Configuration
|
|
194
223
|
|
|
195
224
|
```typescript
|
|
196
|
-
const
|
|
197
|
-
|
|
225
|
+
const response = await client.chat.create({
|
|
226
|
+
model: 'llama2',
|
|
227
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
198
228
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
229
|
+
nova: {
|
|
230
|
+
routeConfigId: 'route-config-uuid', // Use specific route configuration
|
|
231
|
+
},
|
|
202
232
|
});
|
|
203
233
|
```
|
|
204
234
|
|
|
205
|
-
|
|
235
|
+
### RAG (Retrieval-Augmented Generation)
|
|
206
236
|
|
|
207
|
-
|
|
208
|
-
2. Navigate to API Keys section
|
|
209
|
-
3. Create a new API key
|
|
210
|
-
4. Copy the key (format: `nova_sk_...`)
|
|
211
|
-
5. Use it in your SDK initialization
|
|
237
|
+
RAG enables AI models to answer questions using your own documents as context. Instead of sending entire documents with every request, Nova-route automatically retrieves only the relevant chunks that match your query, dramatically reducing token usage (70-90% savings) while improving accuracy.
|
|
212
238
|
|
|
213
|
-
|
|
239
|
+
**How RAG Works:**
|
|
240
|
+
1. Upload documents (PDF, TXT, MD) to a route config via REST API
|
|
241
|
+
2. Documents are automatically parsed, chunked, and embedded
|
|
242
|
+
3. During chat completions, relevant chunks are automatically retrieved and injected as context
|
|
243
|
+
4. Only chunks that fit within your token budget are included
|
|
244
|
+
|
|
245
|
+
**Using RAG with the SDK:**
|
|
214
246
|
|
|
215
|
-
|
|
247
|
+
RAG works automatically once documents are uploaded and processed. Simply use a route config that has RAG enabled:
|
|
216
248
|
|
|
217
249
|
```typescript
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
250
|
+
// RAG is automatic - no code changes needed!
|
|
251
|
+
const response = await client.chat.create({
|
|
252
|
+
model: 'your-route-config-id', // Route config with ragEnabled: true
|
|
253
|
+
messages: [
|
|
254
|
+
{ role: 'user', content: 'What is the vacation policy?' }
|
|
255
|
+
],
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
// Response includes context from your uploaded documents
|
|
259
|
+
console.log(response.choices[0].message.content);
|
|
226
260
|
```
|
|
227
261
|
|
|
228
|
-
|
|
262
|
+
**Document Upload (via REST API):**
|
|
229
263
|
|
|
230
|
-
|
|
231
|
-
- TypeScript 5.0+ (for type definitions)
|
|
264
|
+
Document upload is done via the REST API (not SDK methods). Here's a complete example:
|
|
232
265
|
|
|
233
|
-
|
|
266
|
+
```typescript
|
|
267
|
+
const API_BASE_URL = 'https://api.nova.ai';
|
|
268
|
+
const JWT_TOKEN = 'your-jwt-token'; // From /auth/login
|
|
269
|
+
const ROUTE_CONFIG_ID = 'your-route-config-id';
|
|
234
270
|
|
|
235
|
-
|
|
271
|
+
// 1. Upload document
|
|
272
|
+
async function uploadDocument(file: File) {
|
|
273
|
+
const formData = new FormData();
|
|
274
|
+
formData.append('file', file);
|
|
236
275
|
|
|
276
|
+
const response = await fetch(
|
|
277
|
+
`${API_BASE_URL}/rag/collections/${ROUTE_CONFIG_ID}/documents`,
|
|
278
|
+
{
|
|
279
|
+
method: 'POST',
|
|
280
|
+
headers: {
|
|
281
|
+
'Authorization': `Bearer ${JWT_TOKEN}`,
|
|
282
|
+
},
|
|
283
|
+
body: formData,
|
|
284
|
+
}
|
|
285
|
+
);
|
|
286
|
+
|
|
287
|
+
const document = await response.json();
|
|
288
|
+
console.log('Document uploaded:', document.id);
|
|
289
|
+
|
|
290
|
+
// Poll for processing completion
|
|
291
|
+
return pollDocumentStatus(document.id);
|
|
292
|
+
}
|
|
237
293
|
|
|
294
|
+
// 2. Check processing status
|
|
295
|
+
async function pollDocumentStatus(documentId: string) {
|
|
296
|
+
const maxAttempts = 30;
|
|
297
|
+
const delayMs = 2000;
|
|
298
|
+
|
|
299
|
+
for (let i = 0; i < maxAttempts; i++) {
|
|
300
|
+
const response = await fetch(
|
|
301
|
+
`${API_BASE_URL}/rag/documents/${documentId}`,
|
|
302
|
+
{
|
|
303
|
+
headers: {
|
|
304
|
+
'Authorization': `Bearer ${JWT_TOKEN}`,
|
|
305
|
+
},
|
|
306
|
+
}
|
|
307
|
+
);
|
|
308
|
+
|
|
309
|
+
const document = await response.json();
|
|
310
|
+
|
|
311
|
+
if (document.status === 'completed') {
|
|
312
|
+
console.log('Document processed! Chunks:', document.chunkCount);
|
|
313
|
+
return document;
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
if (document.status === 'failed') {
|
|
317
|
+
throw new Error(`Processing failed: ${document.errorMessage}`);
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
await new Promise(resolve => setTimeout(resolve, delayMs));
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
throw new Error('Document processing timeout');
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
// 3. Use RAG in chat completions (automatic)
|
|
327
|
+
const response = await client.chat.create({
|
|
328
|
+
model: ROUTE_CONFIG_ID, // Route config with ragEnabled: true
|
|
329
|
+
messages: [
|
|
330
|
+
{ role: 'user', content: 'What is the vacation policy?' }
|
|
331
|
+
],
|
|
332
|
+
});
|
|
333
|
+
```
|
|
334
|
+
|
|
335
|
+
**Token Savings with RAG:**
|
|
336
|
+
- **Without RAG**: Send entire documents (10,000+ tokens)
|
|
337
|
+
- **With RAG**: Only relevant chunks (500-2,000 tokens)
|
|
338
|
+
- **Savings**: 70-90% reduction in prompt tokens
|
|
339
|
+
|
|
340
|
+
**RAG Configuration:**
|
|
238
341
|
|
|
342
|
+
RAG settings are configured per route config:
|
|
343
|
+
- `chunkSize`: 100-2048 tokens per chunk (default: 512)
|
|
344
|
+
- `chunkOverlap`: 0-200 tokens overlap (default: 50)
|
|
345
|
+
- `topK`: 1-20 chunks to retrieve (default: 5)
|
|
346
|
+
- `similarityThreshold`: 0.5-0.95 minimum similarity (default: 0.7)
|
|
239
347
|
|
|
348
|
+
**See [RAG SDK Documentation](https://github.com/sayanmohsin/nova-route/blob/main/docs/RAG_SDK.md) for complete details.**
|
|
349
|
+
|
|
350
|
+
### Custom Metadata
|
|
351
|
+
|
|
352
|
+
```typescript
|
|
353
|
+
const response = await client.chat.create({
|
|
354
|
+
model: 'llama2',
|
|
355
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
356
|
+
|
|
357
|
+
nova: {
|
|
358
|
+
metadata: {
|
|
359
|
+
userId: '123',
|
|
360
|
+
sessionId: 'abc',
|
|
361
|
+
feature: 'chatbot',
|
|
362
|
+
},
|
|
363
|
+
},
|
|
364
|
+
});
|
|
365
|
+
```
|
|
366
|
+
|
|
367
|
+
### System Prompt Override
|
|
368
|
+
|
|
369
|
+
```typescript
|
|
370
|
+
const response = await client.chat.create({
|
|
371
|
+
model: 'llama2',
|
|
372
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
373
|
+
|
|
374
|
+
nova: {
|
|
375
|
+
systemPromptOverride: 'You are a specialized technical assistant.',
|
|
376
|
+
},
|
|
377
|
+
});
|
|
378
|
+
```
|
|
240
379
|
|
|
241
380
|
## gRPC Transport
|
|
242
381
|
|
|
243
|
-
|
|
382
|
+
For lower overhead and better performance, use gRPC transport:
|
|
383
|
+
|
|
244
384
|
```typescript
|
|
245
|
-
import { NovaClient } from '@
|
|
385
|
+
import { NovaClient } from '@ancatag/n-r';
|
|
246
386
|
|
|
247
387
|
const client = new NovaClient({
|
|
248
388
|
apiKey: process.env.NOVA_API_KEY || 'nova_sk_...',
|
|
249
|
-
transport: 'grpc',
|
|
389
|
+
transport: 'grpc', // Use gRPC instead of REST
|
|
390
|
+
grpcUrl: '0.0.0.0:50051', // Optional: defaults to 0.0.0.0:50051
|
|
250
391
|
});
|
|
251
392
|
|
|
252
|
-
|
|
393
|
+
// Same API, lower overhead
|
|
394
|
+
const response = await client.chat.create({
|
|
253
395
|
model: 'llama2',
|
|
254
396
|
messages: [{ role: 'user', content: 'Hello' }],
|
|
255
397
|
});
|
|
256
|
-
console.log(res.choices[0].message.content);
|
|
257
|
-
```
|
|
258
|
-
|
|
259
|
-
Streaming works the same via `createStream`.
|
|
260
398
|
|
|
261
|
-
|
|
399
|
+
// Streaming also works with gRPC
|
|
262
400
|
for await (const chunk of client.chat.createStream({
|
|
263
401
|
model: 'llama2',
|
|
264
402
|
messages: [{ role: 'user', content: 'Hey' }],
|
|
@@ -267,19 +405,302 @@ for await (const chunk of client.chat.createStream({
|
|
|
267
405
|
}
|
|
268
406
|
```
|
|
269
407
|
|
|
270
|
-
Transport
|
|
271
|
-
- `transport: 'rest'` (default)
|
|
272
|
-
- `transport: 'grpc'`
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
408
|
+
**Transport Options:**
|
|
409
|
+
- `transport: 'rest'` (default) - Uses HTTP fetch
|
|
410
|
+
- `transport: 'grpc'` - Uses gRPC over grpc-js with ts-proto stubs
|
|
411
|
+
|
|
412
|
+
## API Reference
|
|
413
|
+
|
|
414
|
+
### Client Configuration
|
|
415
|
+
|
|
416
|
+
```typescript
|
|
417
|
+
interface NovaClientConfig {
|
|
418
|
+
/** Nova API key (required, format: nova_sk_...) */
|
|
419
|
+
apiKey: string;
|
|
420
|
+
/** Base URL for REST API (default: http://localhost:3000) */
|
|
421
|
+
baseUrl?: string;
|
|
422
|
+
/** gRPC URL (default: 0.0.0.0:50051) */
|
|
423
|
+
grpcUrl?: string;
|
|
424
|
+
/** Preferred transport: 'rest' | 'grpc' (default: 'rest') */
|
|
425
|
+
transport?: 'rest' | 'grpc';
|
|
426
|
+
/** Request timeout in milliseconds (default: 60000) */
|
|
427
|
+
timeoutMs?: number;
|
|
428
|
+
/** Maximum number of retries for failed requests (default: 2) */
|
|
429
|
+
maxRetries?: number;
|
|
430
|
+
}
|
|
431
|
+
```
|
|
432
|
+
|
|
433
|
+
### Chat Completions
|
|
434
|
+
|
|
435
|
+
#### `client.chat.create(request)`
|
|
436
|
+
|
|
437
|
+
Create a non-streaming chat completion.
|
|
438
|
+
|
|
439
|
+
**Parameters:**
|
|
440
|
+
- `request: ChatCompletionRequest` - Chat completion request (OpenAI-compatible)
|
|
441
|
+
|
|
442
|
+
**Returns:** `Promise<ChatCompletionResponse>`
|
|
443
|
+
|
|
444
|
+
#### `client.chat.createStream(request, signal?)`
|
|
445
|
+
|
|
446
|
+
Create a streaming chat completion.
|
|
447
|
+
|
|
448
|
+
**Parameters:**
|
|
449
|
+
- `request: ChatCompletionRequest` - Chat completion request
|
|
450
|
+
- `signal?: AbortSignal` - Optional abort signal for cancellation
|
|
451
|
+
|
|
452
|
+
**Returns:** `AsyncIterable<ChatCompletionChunk>`
|
|
453
|
+
|
|
454
|
+
### Models
|
|
455
|
+
|
|
456
|
+
#### `client.models.list()`
|
|
457
|
+
|
|
458
|
+
List all available models in your project.
|
|
459
|
+
|
|
460
|
+
**Returns:** `Promise<Model[]>`
|
|
461
|
+
|
|
462
|
+
#### `client.models.get(modelId)`
|
|
463
|
+
|
|
464
|
+
Get details for a specific model.
|
|
465
|
+
|
|
466
|
+
**Parameters:**
|
|
467
|
+
- `modelId: string` - Model identifier
|
|
468
|
+
|
|
469
|
+
**Returns:** `Promise<Model>`
|
|
470
|
+
|
|
471
|
+
### Type Exports
|
|
472
|
+
|
|
473
|
+
```typescript
|
|
474
|
+
import type {
|
|
475
|
+
ChatMessage,
|
|
476
|
+
ChatCompletionRequest,
|
|
477
|
+
ChatCompletionResponse,
|
|
478
|
+
ChatCompletionChunk,
|
|
479
|
+
ChatCompletionChoice,
|
|
480
|
+
ChatCompletionUsage,
|
|
481
|
+
Model,
|
|
482
|
+
NovaClientConfig,
|
|
483
|
+
NovaTransport,
|
|
484
|
+
} from '@ancatag/n-r';
|
|
485
|
+
|
|
486
|
+
import { NovaError } from '@ancatag/n-r';
|
|
487
|
+
```
|
|
488
|
+
|
|
489
|
+
### Nova-Specific Extensions
|
|
490
|
+
|
|
491
|
+
#### Request Extensions
|
|
492
|
+
|
|
493
|
+
```typescript
|
|
494
|
+
interface NovaRequestExtensions {
|
|
495
|
+
nova?: {
|
|
496
|
+
/** Skip cache lookup for this request */
|
|
497
|
+
skipCache?: boolean;
|
|
498
|
+
/** Route config ID - specifies which route configuration to use */
|
|
499
|
+
routeConfigId?: string;
|
|
500
|
+
/** Enable RAG (Retrieval-Augmented Generation) for this request */
|
|
501
|
+
ragEnabled?: boolean;
|
|
502
|
+
/** Additional metadata to attach to the request */
|
|
503
|
+
metadata?: Record<string, any>;
|
|
504
|
+
/** Override the system prompt for this request */
|
|
505
|
+
systemPromptOverride?: string;
|
|
506
|
+
};
|
|
507
|
+
}
|
|
508
|
+
```
|
|
509
|
+
|
|
510
|
+
#### Response Extensions
|
|
511
|
+
|
|
512
|
+
```typescript
|
|
513
|
+
interface NovaResponseExtensions {
|
|
514
|
+
nova?: {
|
|
515
|
+
/** Whether this response was served from cache */
|
|
516
|
+
cacheHit: boolean;
|
|
517
|
+
/** Cache layer used: 'hot' (exact match) | 'semantic' (similarity match) | null */
|
|
518
|
+
cacheLayer?: 'hot' | 'semantic' | null;
|
|
519
|
+
/** Number of tokens saved by cache hit */
|
|
520
|
+
tokensSaved: number;
|
|
521
|
+
/** Response time in milliseconds */
|
|
522
|
+
responseTimeMs: number;
|
|
523
|
+
/** Unique request ID for tracking */
|
|
524
|
+
requestId: string;
|
|
525
|
+
};
|
|
526
|
+
}
|
|
527
|
+
```
|
|
528
|
+
|
|
529
|
+
## Advanced Features
|
|
530
|
+
|
|
531
|
+
### Multi-Tier Caching
|
|
532
|
+
|
|
533
|
+
Nova automatically uses two cache layers:
|
|
534
|
+
|
|
535
|
+
1. **Hot Cache** - Exact match caching (7-30 day TTL)
|
|
536
|
+
- Instant responses for identical requests
|
|
537
|
+
- SHA-256 hash-based lookup
|
|
538
|
+
|
|
539
|
+
2. **Semantic Cache** - Similarity matching (95% threshold)
|
|
540
|
+
- Matches semantically similar prompts
|
|
541
|
+
- Vector embedding-based similarity search
|
|
542
|
+
- Available on paid plans
|
|
543
|
+
|
|
544
|
+
### Cost Savings Tracking
|
|
545
|
+
|
|
546
|
+
Every response includes savings metrics:
|
|
547
|
+
|
|
548
|
+
```typescript
|
|
549
|
+
const response = await client.chat.create({
|
|
550
|
+
model: 'llama2',
|
|
551
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
552
|
+
});
|
|
553
|
+
|
|
554
|
+
if (response.nova?.cacheHit) {
|
|
555
|
+
console.log(`Saved ${response.nova.tokensSaved} tokens`);
|
|
556
|
+
console.log(`Cache layer: ${response.nova.cacheLayer}`);
|
|
557
|
+
}
|
|
558
|
+
```
|
|
559
|
+
|
|
560
|
+
### Model Routing
|
|
561
|
+
|
|
562
|
+
Nova automatically routes requests to the correct provider based on:
|
|
563
|
+
- Model identifier in request
|
|
564
|
+
- Project default model configuration
|
|
565
|
+
- Route configuration (if specified)
|
|
566
|
+
|
|
567
|
+
```typescript
|
|
568
|
+
// Uses project default if model not specified
|
|
569
|
+
const response = await client.chat.create({
|
|
570
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
571
|
+
// model is optional - uses project default
|
|
572
|
+
});
|
|
573
|
+
```
|
|
574
|
+
|
|
575
|
+
### RAG (Retrieval-Augmented Generation)
|
|
576
|
+
|
|
577
|
+
RAG provides **70-90% token savings** by automatically retrieving only relevant document chunks instead of sending entire documents.
|
|
578
|
+
|
|
579
|
+
**How RAG Works:**
|
|
580
|
+
1. **Automatic Context Retrieval**: When you make a chat completion to a route config with `ragEnabled: true`, Nova-route automatically:
|
|
581
|
+
- Extracts the query from the last user message
|
|
582
|
+
- Generates a vector embedding of the query
|
|
583
|
+
- Searches Qdrant for semantically similar chunks
|
|
584
|
+
- Selects top-K chunks above similarity threshold
|
|
585
|
+
- Manages token budget to include only chunks that fit
|
|
586
|
+
- Injects retrieved chunks as context before the user's query
|
|
587
|
+
|
|
588
|
+
2. **Token Budget Management**: Nova-route automatically calculates:
|
|
589
|
+
```
|
|
590
|
+
Token Budget = Context Window - Existing Prompt Tokens - Headroom (200 tokens)
|
|
591
|
+
```
|
|
592
|
+
Only chunks that fit within this budget are included.
|
|
593
|
+
|
|
594
|
+
3. **Prompt Format**: The final prompt sent to the AI model includes:
|
|
595
|
+
```
|
|
596
|
+
[System Prompt]
|
|
597
|
+
[Pre-prompt Items]
|
|
598
|
+
|
|
599
|
+
Context:
|
|
600
|
+
[Relevant chunk 1 from your documents]
|
|
601
|
+
[Relevant chunk 2 from your documents]
|
|
602
|
+
|
|
603
|
+
Query: [User's original question]
|
|
604
|
+
```
|
|
605
|
+
|
|
606
|
+
**RAG Benefits:**
|
|
607
|
+
- **70-90% Token Savings**: Only relevant chunks vs. full documents
|
|
608
|
+
- **Improved Accuracy**: AI responses grounded in your documents
|
|
609
|
+
- **Automatic**: No manual context management needed
|
|
610
|
+
- **Scalable**: Works with large document collections
|
|
611
|
+
- **Intelligent**: Semantic search finds relevant content even with different wording
|
|
612
|
+
|
|
613
|
+
**Example Token Savings:**
|
|
614
|
+
- Full document: 10,000 tokens
|
|
615
|
+
- Relevant chunks: 1,500 tokens
|
|
616
|
+
- **Savings: 8,500 tokens (85%)**
|
|
617
|
+
|
|
618
|
+
**See [RAG SDK Documentation](https://github.com/sayanmohsin/nova-route/blob/main/docs/RAG_SDK.md) for complete setup and configuration details.**
|
|
619
|
+
|
|
620
|
+
## TypeScript Support
|
|
621
|
+
|
|
622
|
+
Full TypeScript support with comprehensive type definitions:
|
|
623
|
+
|
|
624
|
+
```typescript
|
|
625
|
+
import { NovaClient } from '@ancatag/n-r';
|
|
626
|
+
import type {
|
|
627
|
+
ChatCompletionRequest,
|
|
628
|
+
ChatCompletionResponse,
|
|
629
|
+
NovaClientConfig,
|
|
630
|
+
} from '@ancatag/n-r';
|
|
631
|
+
|
|
632
|
+
const config: NovaClientConfig = {
|
|
633
|
+
apiKey: process.env.NOVA_API_KEY!,
|
|
634
|
+
baseUrl: 'https://api.nova.ai',
|
|
635
|
+
};
|
|
636
|
+
|
|
637
|
+
const client = new NovaClient(config);
|
|
638
|
+
|
|
639
|
+
async function chat(
|
|
640
|
+
request: ChatCompletionRequest
|
|
641
|
+
): Promise<ChatCompletionResponse> {
|
|
642
|
+
return await client.chat.create(request);
|
|
643
|
+
}
|
|
644
|
+
```
|
|
645
|
+
|
|
646
|
+
## Requirements
|
|
647
|
+
|
|
648
|
+
- **Node.js**: 18.0.0 or higher (uses native `fetch`)
|
|
649
|
+
- **TypeScript**: 5.0+ (for type definitions, optional)
|
|
650
|
+
|
|
651
|
+
## Getting Started with Nova-route
|
|
652
|
+
|
|
653
|
+
1. **Sign up** at [nova.ai](https://nova.ai) (Free plan available)
|
|
654
|
+
2. **Create a project** in the dashboard
|
|
655
|
+
3. **Configure your models** (BYOP or use hosted providers)
|
|
656
|
+
4. **Generate an API key** (format: `nova_sk_...`)
|
|
657
|
+
5. **Install the SDK** and start saving on token costs!
|
|
658
|
+
|
|
659
|
+
## Migration from OpenAI
|
|
660
|
+
|
|
661
|
+
Switching from OpenAI to Nova-route is simple:
|
|
662
|
+
|
|
663
|
+
```typescript
|
|
664
|
+
// Before (direct OpenAI)
|
|
665
|
+
import OpenAI from 'openai';
|
|
666
|
+
|
|
667
|
+
const client = new OpenAI({
|
|
668
|
+
apiKey: process.env.OPENAI_API_KEY,
|
|
669
|
+
});
|
|
670
|
+
|
|
671
|
+
// After (via Nova-route) - Just change the SDK!
|
|
672
|
+
import { NovaClient } from '@ancatag/n-r';
|
|
673
|
+
|
|
674
|
+
const client = new NovaClient({
|
|
675
|
+
apiKey: process.env.NOVA_API_KEY, // Get from Nova-route dashboard
|
|
676
|
+
baseUrl: 'https://api.nova.ai', // Nova-route API endpoint
|
|
677
|
+
});
|
|
678
|
+
|
|
679
|
+
// Your code stays exactly the same!
|
|
680
|
+
const response = await client.chat.create({
|
|
681
|
+
model: 'gpt-4',
|
|
682
|
+
messages: [{ role: 'user', content: 'Hello!' }],
|
|
683
|
+
});
|
|
684
|
+
```
|
|
685
|
+
|
|
686
|
+
## Documentation
|
|
687
|
+
|
|
688
|
+
- **[Full SDK Documentation](https://github.com/sayanmohsin/nova-route/blob/main/docs/SDK.md)** - Complete SDK reference
|
|
689
|
+
- **[RAG SDK Documentation](https://github.com/sayanmohsin/nova-route/blob/main/docs/RAG_SDK.md)** - RAG setup, document upload, and configuration
|
|
690
|
+
- **[API Documentation](https://github.com/sayanmohsin/nova-route/blob/main/docs/API_DOCUMENTATION.md)** - API endpoints and authentication
|
|
691
|
+
- **[Architecture Guide](https://github.com/sayanmohsin/nova-route/blob/main/docs/ARCHITECTURE.md)** - System design and components
|
|
692
|
+
- **[Getting Started](https://github.com/sayanmohsin/nova-route/blob/main/docs/API_GETTING_STARTED.md)** - Quick start guide
|
|
693
|
+
|
|
694
|
+
## License
|
|
695
|
+
|
|
696
|
+
ISC
|
|
697
|
+
|
|
698
|
+
## Support
|
|
699
|
+
|
|
700
|
+
- **Documentation**: [docs/](https://github.com/sayanmohsin/nova-route/tree/main/docs)
|
|
701
|
+
- **Issues**: [GitHub Issues](https://github.com/sayanmohsin/nova-route/issues)
|
|
702
|
+
- **Dashboard**: [nova.ai](https://nova.ai)
|
|
703
|
+
|
|
704
|
+
---
|
|
705
|
+
|
|
706
|
+
**Built with ❤️ by the Nova-route team**
|