@dooor-ai/cortexdb 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +137 -60
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,15 +1,29 @@
|
|
|
1
1
|
# CortexDB TypeScript SDK
|
|
2
2
|
|
|
3
|
-
Official TypeScript/JavaScript client for CortexDB
|
|
3
|
+
Official TypeScript/JavaScript client for CortexDB.
|
|
4
|
+
|
|
5
|
+
## What is CortexDB?
|
|
6
|
+
|
|
7
|
+
CortexDB is a multi-modal RAG (Retrieval Augmented Generation) platform that combines traditional database capabilities with vector search and advanced document processing. It enables you to:
|
|
8
|
+
|
|
9
|
+
- Store structured and unstructured data in a unified database
|
|
10
|
+
- Automatically extract text from documents (PDF, DOCX, XLSX) using Docling
|
|
11
|
+
- Generate embeddings for semantic search using various providers (OpenAI, Gemini, etc.)
|
|
12
|
+
- Perform hybrid search combining filters with vector similarity
|
|
13
|
+
- Build RAG applications with automatic chunking and vectorization
|
|
14
|
+
|
|
15
|
+
CortexDB handles the complex infrastructure of vector databases (Qdrant), object storage (MinIO), and traditional databases (PostgreSQL) behind a simple API.
|
|
4
16
|
|
|
5
17
|
## Features
|
|
6
18
|
|
|
7
|
-
-
|
|
8
|
-
-
|
|
9
|
-
-
|
|
10
|
-
-
|
|
11
|
-
-
|
|
12
|
-
-
|
|
19
|
+
- **Multi-modal document processing**: Upload PDFs, DOCX, XLSX files and automatically extract text with OCR fallback
|
|
20
|
+
- **Semantic search**: Vector-based search using embeddings from OpenAI, Gemini, or custom providers
|
|
21
|
+
- **Automatic chunking**: Smart text splitting optimized for RAG applications
|
|
22
|
+
- **Flexible schema**: Define collections with typed fields (string, number, boolean, file, array)
|
|
23
|
+
- **Hybrid queries**: Combine exact filters with semantic search
|
|
24
|
+
- **Storage control**: Choose where each field is stored (PostgreSQL, Qdrant, MinIO)
|
|
25
|
+
- **Type-safe**: Full TypeScript support with comprehensive type definitions
|
|
26
|
+
- **Modern API**: Async/await using native fetch (Node.js 18+)
|
|
13
27
|
|
|
14
28
|
## Installation
|
|
15
29
|
|
|
@@ -39,28 +53,34 @@ async function main() {
|
|
|
39
53
|
baseUrl: 'http://localhost:8000'
|
|
40
54
|
});
|
|
41
55
|
|
|
42
|
-
// Create a collection
|
|
43
|
-
await client.collections.create(
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
56
|
+
// Create a collection with vectorization enabled
|
|
57
|
+
await client.collections.create(
|
|
58
|
+
'documents',
|
|
59
|
+
[
|
|
60
|
+
{ name: 'title', type: FieldType.STRING },
|
|
61
|
+
{ name: 'content', type: FieldType.STRING, vectorize: true }
|
|
62
|
+
],
|
|
63
|
+
'your-embedding-provider-id' // Required when vectorize=true
|
|
64
|
+
);
|
|
47
65
|
|
|
48
66
|
// Create a record
|
|
49
67
|
const record = await client.records.create('documents', {
|
|
50
|
-
title: '
|
|
51
|
-
content: '
|
|
68
|
+
title: 'Introduction to AI',
|
|
69
|
+
content: 'Artificial intelligence is transforming how we build software...'
|
|
52
70
|
});
|
|
53
71
|
|
|
54
|
-
// Semantic search
|
|
72
|
+
// Semantic search - finds relevant content by meaning, not just keywords
|
|
55
73
|
const results = await client.records.search(
|
|
56
74
|
'documents',
|
|
57
|
-
'
|
|
75
|
+
'How is AI changing software development?',
|
|
58
76
|
undefined,
|
|
59
77
|
10
|
|
60
78
|
);
|
|
61
79
|
|
|
62
80
|
results.results.forEach(result => {
|
|
63
|
-
console.log(`Score: ${result.score.toFixed(4)}
|
|
81
|
+
console.log(`Score: ${result.score.toFixed(4)}`);
|
|
82
|
+
console.log(`Title: ${result.record.data.title}`);
|
|
83
|
+
console.log(`Content: ${result.record.data.content}\n`);
|
|
64
84
|
});
|
|
65
85
|
|
|
66
86
|
await client.close();
|
|
@@ -81,7 +101,7 @@ const client = new CortexClient({
|
|
|
81
101
|
baseUrl: 'http://localhost:8000'
|
|
82
102
|
});
|
|
83
103
|
|
|
84
|
-
//
|
|
104
|
+
// Production with API key
|
|
85
105
|
const client = new CortexClient({
|
|
86
106
|
baseUrl: 'https://api.cortexdb.com',
|
|
87
107
|
apiKey: 'your-api-key'
|
|
@@ -96,44 +116,52 @@ const client = new CortexClient({
|
|
|
96
116
|
|
|
97
117
|
### Collections
|
|
98
118
|
|
|
119
|
+
Collections define the schema for your data. Each collection can have multiple fields with different types and storage options.
|
|
120
|
+
|
|
99
121
|
```typescript
|
|
100
122
|
import { FieldType, StoreLocation } from '@dooor-ai/cortexdb';
|
|
101
123
|
|
|
102
|
-
// Create collection
|
|
103
|
-
const collection = await client.collections.create(
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
]
|
|
124
|
+
// Create collection with vectorization
|
|
125
|
+
const collection = await client.collections.create(
|
|
126
|
+
'articles',
|
|
127
|
+
[
|
|
128
|
+
{
|
|
129
|
+
name: 'title',
|
|
130
|
+
type: FieldType.STRING
|
|
131
|
+
},
|
|
132
|
+
{
|
|
133
|
+
name: 'content',
|
|
134
|
+
type: FieldType.STRING,
|
|
135
|
+
vectorize: true // Enable semantic search on this field
|
|
136
|
+
},
|
|
137
|
+
{
|
|
138
|
+
name: 'year',
|
|
139
|
+
type: FieldType.NUMBER,
|
|
140
|
+
store_in: [StoreLocation.POSTGRES, StoreLocation.QDRANT_PAYLOAD]
|
|
141
|
+
}
|
|
142
|
+
],
|
|
143
|
+
'embedding-provider-id' // Required when any field has vectorize=true
|
|
144
|
+
);
|
|
119
145
|
|
|
120
146
|
// List collections
|
|
121
147
|
const collections = await client.collections.list();
|
|
122
148
|
|
|
123
|
-
// Get collection
|
|
149
|
+
// Get collection schema
|
|
124
150
|
const schema = await client.collections.get('articles');
|
|
125
151
|
|
|
126
|
-
// Delete collection
|
|
152
|
+
// Delete collection and all its records
|
|
127
153
|
await client.collections.delete('articles');
|
|
128
154
|
```
|
|
129
155
|
|
|
130
156
|
### Records
|
|
131
157
|
|
|
158
|
+
Records are the actual data stored in collections. They must match the collection schema.
|
|
159
|
+
|
|
132
160
|
```typescript
|
|
133
161
|
// Create record
|
|
134
162
|
const record = await client.records.create('articles', {
|
|
135
163
|
title: 'Machine Learning Basics',
|
|
136
|
-
content: '
|
|
164
|
+
content: 'Machine learning is a subset of AI that focuses on learning from data...',
|
|
137
165
|
year: 2024
|
|
138
166
|
});
|
|
139
167
|
|
|
@@ -148,7 +176,7 @@ const updated = await client.records.update('articles', record.id, {
|
|
|
148
176
|
// Delete record
|
|
149
177
|
await client.records.delete('articles', record.id);
|
|
150
178
|
|
|
151
|
-
// List records
|
|
179
|
+
// List records with pagination
|
|
152
180
|
const results = await client.records.list('articles', {
|
|
153
181
|
limit: 10,
|
|
154
182
|
offset: 0
|
|
@@ -157,8 +185,10 @@ const results = await client.records.list('articles', {
|
|
|
157
185
|
|
|
158
186
|
### Semantic Search
|
|
159
187
|
|
|
188
|
+
Semantic search finds records by meaning, not just exact keyword matches. It uses vector embeddings to understand context.
|
|
189
|
+
|
|
160
190
|
```typescript
|
|
161
|
-
// Basic search
|
|
191
|
+
// Basic semantic search
|
|
162
192
|
const results = await client.records.search(
|
|
163
193
|
'articles',
|
|
164
194
|
'machine learning fundamentals',
|
|
@@ -166,7 +196,7 @@ const results = await client.records.search(
|
|
|
166
196
|
10
|
|
167
197
|
);
|
|
168
198
|
|
|
169
|
-
// Search with filters
|
|
199
|
+
// Search with filters - combine semantic search with exact matches
|
|
170
200
|
const filteredResults = await client.records.search(
|
|
171
201
|
'articles',
|
|
172
202
|
'neural networks',
|
|
@@ -177,30 +207,55 @@ const filteredResults = await client.records.search(
|
|
|
177
207
|
5
|
|
178
208
|
);
|
|
179
209
|
|
|
180
|
-
// Process results
|
|
210
|
+
// Process results - ordered by relevance score
|
|
181
211
|
filteredResults.results.forEach(result => {
|
|
182
|
-
console.log(`Score: ${result.score.toFixed(4)}`);
|
|
212
|
+
console.log(`Score: ${result.score.toFixed(4)}`); // Higher = more relevant
|
|
183
213
|
console.log(`Title: ${result.record.data.title}`);
|
|
184
214
|
console.log(`Year: ${result.record.data.year}`);
|
|
185
215
|
});
|
|
186
216
|
```
|
|
187
217
|
|
|
218
|
+
### Working with Files
|
|
219
|
+
|
|
220
|
+
CortexDB can process documents and automatically extract text for vectorization.
|
|
221
|
+
|
|
222
|
+
```typescript
|
|
223
|
+
// Create collection with file field
|
|
224
|
+
await client.collections.create(
|
|
225
|
+
'documents',
|
|
226
|
+
[
|
|
227
|
+
{ name: 'title', type: FieldType.STRING },
|
|
228
|
+
{
|
|
229
|
+
name: 'document',
|
|
230
|
+
type: FieldType.FILE,
|
|
231
|
+
vectorize: true // Extract text and create embeddings
|
|
232
|
+
}
|
|
233
|
+
],
|
|
234
|
+
'embedding-provider-id'
|
|
235
|
+
);
|
|
236
|
+
|
|
237
|
+
// Note: File upload support is currently available in the REST API
|
|
238
|
+
// TypeScript SDK file upload will be added in a future version
|
|
239
|
+
```
|
|
240
|
+
|
|
188
241
|
### Filter Operators
|
|
189
242
|
|
|
190
243
|
```typescript
|
|
191
|
-
// Exact match
|
|
244
|
+
// Exact match filters
|
|
192
245
|
const results = await client.records.list('articles', {
|
|
193
246
|
filters: {
|
|
194
247
|
category: 'technology',
|
|
195
|
-
published: true
|
|
248
|
+
published: true,
|
|
249
|
+
year: 2024
|
|
196
250
|
}
|
|
197
251
|
});
|
|
198
252
|
|
|
199
|
-
// Combine filters
|
|
253
|
+
// Combine multiple filters
|
|
200
254
|
const filtered = await client.records.list('articles', {
|
|
201
255
|
filters: {
|
|
202
256
|
year: 2024,
|
|
203
|
-
category: 'AI'
|
|
257
|
+
category: 'AI',
|
|
258
|
+
author: 'John Doe'
|
|
204
259
|
},
|
|
205
260
|
limit: 20
|
|
206
261
|
});
|
|
@@ -208,12 +263,15 @@ const filtered = await client.records.list('articles', {
|
|
|
208
263
|
|
|
209
264
|
## Error Handling
|
|
210
265
|
|
|
266
|
+
The SDK provides specific error types for different failure scenarios.
|
|
267
|
+
|
|
211
268
|
```typescript
|
|
212
269
|
import {
|
|
213
270
|
CortexDBError,
|
|
214
271
|
CortexDBNotFoundError,
|
|
215
272
|
CortexDBValidationError,
|
|
216
|
-
CortexDBConnectionError
|
|
273
|
+
CortexDBConnectionError,
|
|
274
|
+
CortexDBTimeoutError
|
|
217
275
|
} from '@dooor-ai/cortexdb';
|
|
218
276
|
|
|
219
277
|
try {
|
|
@@ -222,9 +280,11 @@ try {
|
|
|
222
280
|
if (error instanceof CortexDBNotFoundError) {
|
|
223
281
|
console.log('Record not found');
|
|
224
282
|
} else if (error instanceof CortexDBValidationError) {
|
|
225
|
-
console.log('
|
|
283
|
+
console.log('Invalid data:', error.message);
|
|
226
284
|
} else if (error instanceof CortexDBConnectionError) {
|
|
227
285
|
console.log('Connection failed:', error.message);
|
|
286
|
+
} else if (error instanceof CortexDBTimeoutError) {
|
|
287
|
+
console.log('Request timed out:', error.message);
|
|
228
288
|
} else if (error instanceof CortexDBError) {
|
|
229
289
|
console.log('General error:', error.message);
|
|
230
290
|
}
|
|
@@ -233,11 +293,11 @@ try {
|
|
|
233
293
|
|
|
234
294
|
## Examples
|
|
235
295
|
|
|
236
|
-
Check the [`examples/`](./examples) directory for
|
|
296
|
+
Check the [`examples/`](./examples) directory for complete working examples:
|
|
237
297
|
|
|
238
|
-
- [`quickstart.ts`](./examples/quickstart.ts) -
|
|
239
|
-
- [`search.ts`](./examples/search.ts) - Semantic search with filters
|
|
240
|
-
- [`basic.ts`](./examples/basic.ts) - Basic operations
|
|
298
|
+
- [`quickstart.ts`](./examples/quickstart.ts) - Complete walkthrough of SDK features
|
|
299
|
+
- [`search.ts`](./examples/search.ts) - Semantic search with filters and providers
|
|
300
|
+
- [`basic.ts`](./examples/basic.ts) - Basic CRUD operations
|
|
241
301
|
|
|
242
302
|
Run examples:
|
|
243
303
|
|
|
@@ -264,27 +324,44 @@ npm run build
|
|
|
264
324
|
### Scripts
|
|
265
325
|
|
|
266
326
|
```bash
|
|
267
|
-
# Build
|
|
327
|
+
# Build TypeScript
|
|
268
328
|
npm run build
|
|
269
329
|
|
|
270
|
-
#
|
|
330
|
+
# Build in watch mode
|
|
271
331
|
npm run build:watch
|
|
272
332
|
|
|
273
|
-
# Clean
|
|
333
|
+
# Clean build artifacts
|
|
274
334
|
npm run clean
|
|
275
335
|
|
|
276
|
-
# Lint
|
|
336
|
+
# Lint code
|
|
277
337
|
npm run lint
|
|
278
338
|
|
|
279
|
-
# Format
|
|
339
|
+
# Format code
|
|
280
340
|
npm run format
|
|
281
341
|
```
|
|
282
342
|
|
|
283
343
|
## Requirements
|
|
284
344
|
|
|
285
|
-
- Node.js >= 18.0.0
|
|
286
|
-
- CortexDB gateway running
|
|
345
|
+
- Node.js >= 18.0.0 (for native fetch support)
|
|
346
|
+
- CortexDB gateway running locally or remotely
|
|
347
|
+
- Embedding provider configured (OpenAI, Gemini, etc.) if using vectorization
|
|
348
|
+
|
|
349
|
+
## Architecture
|
|
350
|
+
|
|
351
|
+
CortexDB integrates multiple technologies:
|
|
352
|
+
|
|
353
|
+
- **PostgreSQL**: Stores structured data and metadata
|
|
354
|
+
- **Qdrant**: Vector database for semantic search
|
|
355
|
+
- **MinIO**: Object storage for files
|
|
356
|
+
- **Docling**: Advanced document processing and text extraction
|
|
357
|
+
|
|
358
|
+
The SDK abstracts this complexity into a simple, unified API.
|
|
287
359
|
|
|
288
360
|
## License
|
|
289
361
|
|
|
290
362
|
MIT License - see [LICENSE](./LICENSE) for details.
|
|
363
|
+
|
|
364
|
+
## Related
|
|
365
|
+
|
|
366
|
+
- [CortexDB Python SDK](../python) - Python client for CortexDB
|
|
367
|
+
- [CortexDB Documentation](../../docs) - Complete platform documentation
|
package/package.json
CHANGED