@dooor-ai/cortexdb 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +238 -173
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,34 +1,47 @@
|
|
|
1
|
-
# CortexDB TypeScript
|
|
1
|
+
# CortexDB TypeScript SDK
|
|
2
2
|
|
|
3
|
-
Official TypeScript/JavaScript client for
|
|
3
|
+
Official TypeScript/JavaScript client for CortexDB.
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
5
|
+
## What is CortexDB?
|
|
6
|
+
|
|
7
|
+
CortexDB is a multi-modal RAG (Retrieval Augmented Generation) platform that combines traditional database capabilities with vector search and advanced document processing. It enables you to:
|
|
8
|
+
|
|
9
|
+
- Store structured and unstructured data in a unified database
|
|
10
|
+
- Automatically extract text from documents (PDF, DOCX, XLSX) using Docling
|
|
11
|
+
- Generate embeddings for semantic search using various providers (OpenAI, Gemini, etc.)
|
|
12
|
+
- Perform hybrid search combining filters with vector similarity
|
|
13
|
+
- Build RAG applications with automatic chunking and vectorization
|
|
14
|
+
|
|
15
|
+
CortexDB handles the complex infrastructure of vector databases (Qdrant), object storage (MinIO), and traditional databases (PostgreSQL) behind a simple API.
|
|
7
16
|
|
|
8
17
|
## Features
|
|
9
18
|
|
|
10
|
-
-
|
|
11
|
-
-
|
|
12
|
-
-
|
|
13
|
-
-
|
|
14
|
-
-
|
|
15
|
-
-
|
|
16
|
-
-
|
|
19
|
+
- **Multi-modal document processing**: Upload PDFs, DOCX, XLSX files and automatically extract text with OCR fallback
|
|
20
|
+
- **Semantic search**: Vector-based search using embeddings from OpenAI, Gemini, or custom providers
|
|
21
|
+
- **Automatic chunking**: Smart text splitting optimized for RAG applications
|
|
22
|
+
- **Flexible schema**: Define collections with typed fields (string, number, boolean, file, array)
|
|
23
|
+
- **Hybrid queries**: Combine exact filters with semantic search
|
|
24
|
+
- **Storage control**: Choose where each field is stored (PostgreSQL, Qdrant, MinIO)
|
|
25
|
+
- **Type-safe**: Full TypeScript support with comprehensive type definitions
|
|
26
|
+
- **Modern API**: Async/await using native fetch (Node.js 18+)
|
|
17
27
|
|
|
18
28
|
## Installation
|
|
19
29
|
|
|
20
30
|
```bash
|
|
21
31
|
npm install @dooor-ai/cortexdb
|
|
22
|
-
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Or with yarn:
|
|
35
|
+
|
|
36
|
+
```bash
|
|
23
37
|
yarn add @dooor-ai/cortexdb
|
|
24
|
-
# or
|
|
25
|
-
pnpm add @dooor-ai/cortexdb
|
|
26
38
|
```
|
|
27
39
|
|
|
28
|
-
|
|
40
|
+
Or with pnpm:
|
|
29
41
|
|
|
30
|
-
|
|
31
|
-
|
|
42
|
+
```bash
|
|
43
|
+
pnpm add @dooor-ai/cortexdb
|
|
44
|
+
```
|
|
32
45
|
|
|
33
46
|
## Quick Start
|
|
34
47
|
|
|
@@ -36,267 +49,319 @@ pnpm add @dooor-ai/cortexdb
|
|
|
36
49
|
import { CortexClient, FieldType } from '@dooor-ai/cortexdb';
|
|
37
50
|
|
|
38
51
|
async function main() {
|
|
39
|
-
// Initialize client
|
|
40
52
|
const client = new CortexClient({
|
|
41
|
-
baseUrl: 'http://localhost:8000'
|
|
42
|
-
// apiKey: 'YOUR_API_KEY', // Optional: if authentication is enabled
|
|
53
|
+
baseUrl: 'http://localhost:8000'
|
|
43
54
|
});
|
|
44
55
|
|
|
45
|
-
//
|
|
46
|
-
const isHealthy = await client.healthcheck();
|
|
47
|
-
console.log('CortexDB:', isHealthy ? 'Connected ✓' : 'Disconnected ✗');
|
|
48
|
-
|
|
49
|
-
// Create a collection
|
|
50
|
-
await client.collections.create('my_docs', [
|
|
51
|
-
{ name: 'title', type: FieldType.STRING },
|
|
52
|
-
{ name: 'content', type: FieldType.STRING },
|
|
53
|
-
]);
|
|
54
|
-
|
|
55
|
-
// Create a record
|
|
56
|
-
const record = await client.records.create('my_docs', {
|
|
57
|
-
title: 'Hello World',
|
|
58
|
-
content: 'This is my first document',
|
|
59
|
-
});
|
|
60
|
-
|
|
61
|
-
console.log('Created:', record.id);
|
|
62
|
-
|
|
63
|
-
// Get the record
|
|
64
|
-
const fetched = await client.records.get('my_docs', record.id);
|
|
65
|
-
console.log('Fetched:', fetched.data);
|
|
66
|
-
|
|
67
|
-
// List all records
|
|
68
|
-
const results = await client.records.list('my_docs');
|
|
69
|
-
console.log('Total:', results.total);
|
|
70
|
-
|
|
71
|
-
// Clean up
|
|
72
|
-
await client.collections.delete('my_docs');
|
|
73
|
-
await client.close();
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
main();
|
|
77
|
-
```
|
|
78
|
-
|
|
79
|
-
## Semantic Search Example
|
|
80
|
-
|
|
81
|
-
Enable semantic search by creating a collection with `vectorize: true` and an embedding provider:
|
|
82
|
-
|
|
83
|
-
```typescript
|
|
84
|
-
import { CortexClient, FieldType } from '@dooor-ai/cortexdb';
|
|
85
|
-
|
|
86
|
-
async function semanticSearch() {
|
|
87
|
-
const client = new CortexClient({ baseUrl: 'http://localhost:8000' });
|
|
88
|
-
|
|
89
|
-
// Create collection with vectorization
|
|
56
|
+
// Create a collection with vectorization enabled
|
|
90
57
|
await client.collections.create(
|
|
91
|
-
'
|
|
58
|
+
'documents',
|
|
92
59
|
[
|
|
93
60
|
{ name: 'title', type: FieldType.STRING },
|
|
94
|
-
{ name: 'content', type: FieldType.STRING, vectorize: true }
|
|
61
|
+
{ name: 'content', type: FieldType.STRING, vectorize: true }
|
|
95
62
|
],
|
|
96
|
-
'your-embedding-provider-id'
|
|
63
|
+
'your-embedding-provider-id' // Required when vectorize=true
|
|
97
64
|
);
|
|
98
65
|
|
|
99
|
-
//
|
|
100
|
-
await client.records.create('
|
|
101
|
-
title: '
|
|
102
|
-
content: '
|
|
103
|
-
});
|
|
104
|
-
|
|
105
|
-
await client.records.create('knowledge_base', {
|
|
106
|
-
title: 'Deep Learning',
|
|
107
|
-
content: 'Deep learning uses neural networks with multiple layers.',
|
|
66
|
+
// Create a record
|
|
67
|
+
const record = await client.records.create('documents', {
|
|
68
|
+
title: 'Introduction to AI',
|
|
69
|
+
content: 'Artificial intelligence is transforming how we build software...'
|
|
108
70
|
});
|
|
109
71
|
|
|
110
|
-
//
|
|
72
|
+
// Semantic search - finds relevant content by meaning, not just keywords
|
|
111
73
|
const results = await client.records.search(
|
|
112
|
-
'
|
|
113
|
-
'
|
|
114
|
-
undefined,
|
|
115
|
-
|
|
74
|
+
'documents',
|
|
75
|
+
'How is AI changing software development?',
|
|
76
|
+
undefined,
|
|
77
|
+
10
|
|
116
78
|
);
|
|
117
79
|
|
|
118
|
-
results.results.forEach(
|
|
119
|
-
console.log(
|
|
80
|
+
results.results.forEach(result => {
|
|
81
|
+
console.log(`Score: ${result.score.toFixed(4)}`);
|
|
82
|
+
console.log(`Title: ${result.record.data.title}`);
|
|
83
|
+
console.log(`Content: ${result.record.data.content}\n`);
|
|
120
84
|
});
|
|
121
85
|
|
|
122
86
|
await client.close();
|
|
123
87
|
}
|
|
88
|
+
|
|
89
|
+
main();
|
|
124
90
|
```
|
|
125
91
|
|
|
126
|
-
##
|
|
92
|
+
## Usage
|
|
127
93
|
|
|
128
|
-
###
|
|
94
|
+
### Initialize Client
|
|
129
95
|
|
|
130
96
|
```typescript
|
|
97
|
+
import { CortexClient } from '@dooor-ai/cortexdb';
|
|
98
|
+
|
|
99
|
+
// Local development
|
|
131
100
|
const client = new CortexClient({
|
|
132
|
-
baseUrl
|
|
133
|
-
apiKey?: string; // Optional API key
|
|
134
|
-
timeout?: number; // Request timeout in ms (default: 30000)
|
|
101
|
+
baseUrl: 'http://localhost:8000'
|
|
135
102
|
});
|
|
136
|
-
```
|
|
137
103
|
|
|
138
|
-
|
|
104
|
+
// Production with API key
|
|
105
|
+
const client = new CortexClient({
|
|
106
|
+
baseUrl: 'https://api.cortexdb.com',
|
|
107
|
+
apiKey: 'your-api-key'
|
|
108
|
+
});
|
|
139
109
|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
110
|
+
// Custom timeout
|
|
111
|
+
const client = new CortexClient({
|
|
112
|
+
baseUrl: 'http://localhost:8000',
|
|
113
|
+
timeout: 60000 // 60 seconds
|
|
114
|
+
});
|
|
115
|
+
```
|
|
143
116
|
|
|
144
|
-
### Collections
|
|
117
|
+
### Collections
|
|
145
118
|
|
|
146
|
-
|
|
147
|
-
// List all collections
|
|
148
|
-
const collections = await client.collections.list();
|
|
119
|
+
Collections define the schema for your data. Each collection can have multiple fields with different types and storage options.
|
|
149
120
|
|
|
150
|
-
|
|
151
|
-
|
|
121
|
+
```typescript
|
|
122
|
+
import { FieldType, StoreLocation } from '@dooor-ai/cortexdb';
|
|
152
123
|
|
|
153
|
-
// Create
|
|
154
|
-
await client.collections.create(
|
|
155
|
-
'
|
|
124
|
+
// Create collection with vectorization
|
|
125
|
+
const collection = await client.collections.create(
|
|
126
|
+
'articles',
|
|
156
127
|
[
|
|
157
|
-
{
|
|
158
|
-
|
|
128
|
+
{
|
|
129
|
+
name: 'title',
|
|
130
|
+
type: FieldType.STRING
|
|
131
|
+
},
|
|
132
|
+
{
|
|
133
|
+
name: 'content',
|
|
134
|
+
type: FieldType.STRING,
|
|
135
|
+
vectorize: true // Enable semantic search on this field
|
|
136
|
+
},
|
|
137
|
+
{
|
|
138
|
+
name: 'year',
|
|
139
|
+
type: FieldType.NUMBER,
|
|
140
|
+
store_in: [StoreLocation.POSTGRES, StoreLocation.QDRANT_PAYLOAD]
|
|
141
|
+
}
|
|
159
142
|
],
|
|
160
|
-
'embedding-provider-id'
|
|
143
|
+
'embedding-provider-id' // Required when any field has vectorize=true
|
|
161
144
|
);
|
|
162
145
|
|
|
163
|
-
//
|
|
164
|
-
await client.collections.
|
|
146
|
+
// List collections
|
|
147
|
+
const collections = await client.collections.list();
|
|
148
|
+
|
|
149
|
+
// Get collection schema
|
|
150
|
+
const schema = await client.collections.get('articles');
|
|
165
151
|
|
|
166
|
-
// Delete
|
|
167
|
-
await client.collections.delete('
|
|
152
|
+
// Delete collection and all its records
|
|
153
|
+
await client.collections.delete('articles');
|
|
168
154
|
```
|
|
169
155
|
|
|
170
|
-
### Records
|
|
156
|
+
### Records
|
|
157
|
+
|
|
158
|
+
Records are the actual data stored in collections. They must match the collection schema.
|
|
171
159
|
|
|
172
160
|
```typescript
|
|
173
|
-
// Create
|
|
174
|
-
const record = await client.records.create('
|
|
175
|
-
title: '
|
|
176
|
-
content: '
|
|
161
|
+
// Create record
|
|
162
|
+
const record = await client.records.create('articles', {
|
|
163
|
+
title: 'Machine Learning Basics',
|
|
164
|
+
content: 'Machine learning is a subset of AI that focuses on learning from data...',
|
|
165
|
+
year: 2024
|
|
177
166
|
});
|
|
178
167
|
|
|
179
|
-
// Get
|
|
180
|
-
const
|
|
168
|
+
// Get record by ID
|
|
169
|
+
const fetched = await client.records.get('articles', record.id);
|
|
181
170
|
|
|
182
|
-
//
|
|
183
|
-
const
|
|
184
|
-
|
|
185
|
-
limit: 10,
|
|
186
|
-
offset: 0,
|
|
171
|
+
// Update record
|
|
172
|
+
const updated = await client.records.update('articles', record.id, {
|
|
173
|
+
year: 2025
|
|
187
174
|
});
|
|
188
175
|
|
|
189
|
-
//
|
|
190
|
-
await client.records.
|
|
191
|
-
|
|
176
|
+
// Delete record
|
|
177
|
+
await client.records.delete('articles', record.id);
|
|
178
|
+
|
|
179
|
+
// List records with pagination
|
|
180
|
+
const results = await client.records.list('articles', {
|
|
181
|
+
limit: 10,
|
|
182
|
+
offset: 0
|
|
192
183
|
});
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
### Semantic Search
|
|
193
187
|
|
|
194
|
-
|
|
195
|
-
await client.records.delete('collection_name', 'record-id');
|
|
188
|
+
Semantic search finds records by meaning, not just exact keyword matches. It uses vector embeddings to understand context.
|
|
196
189
|
|
|
197
|
-
|
|
190
|
+
```typescript
|
|
191
|
+
// Basic semantic search
|
|
198
192
|
const results = await client.records.search(
|
|
199
|
-
'
|
|
200
|
-
'
|
|
201
|
-
|
|
202
|
-
10
|
|
193
|
+
'articles',
|
|
194
|
+
'machine learning fundamentals',
|
|
195
|
+
undefined,
|
|
196
|
+
10
|
|
203
197
|
);
|
|
198
|
+
|
|
199
|
+
// Search with filters - combine semantic search with exact matches
|
|
200
|
+
const filteredResults = await client.records.search(
|
|
201
|
+
'articles',
|
|
202
|
+
'neural networks',
|
|
203
|
+
{
|
|
204
|
+
year: 2024,
|
|
205
|
+
category: 'AI'
|
|
206
|
+
},
|
|
207
|
+
5
|
|
208
|
+
);
|
|
209
|
+
|
|
210
|
+
// Process results - ordered by relevance score
|
|
211
|
+
filteredResults.results.forEach(result => {
|
|
212
|
+
console.log(`Score: ${result.score.toFixed(4)}`); // Higher = more relevant
|
|
213
|
+
console.log(`Title: ${result.record.data.title}`);
|
|
214
|
+
console.log(`Year: ${result.record.data.year}`);
|
|
215
|
+
});
|
|
204
216
|
```
|
|
205
217
|
|
|
206
|
-
|
|
218
|
+
### Working with Files
|
|
219
|
+
|
|
220
|
+
CortexDB can process documents and automatically extract text for vectorization.
|
|
207
221
|
|
|
208
222
|
```typescript
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
223
|
+
// Create collection with file field
|
|
224
|
+
await client.collections.create(
|
|
225
|
+
'documents',
|
|
226
|
+
[
|
|
227
|
+
{ name: 'title', type: FieldType.STRING },
|
|
228
|
+
{
|
|
229
|
+
name: 'document',
|
|
230
|
+
type: FieldType.FILE,
|
|
231
|
+
vectorize: true // Extract text and create embeddings
|
|
232
|
+
}
|
|
219
233
|
],
|
|
220
|
-
|
|
234
|
+
'embedding-provider-id'
|
|
235
|
+
);
|
|
236
|
+
|
|
237
|
+
// Note: File upload support is currently available in the REST API
|
|
238
|
+
// TypeScript SDK file upload will be added in a future version
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
### Filter Operators
|
|
242
|
+
|
|
243
|
+
```typescript
|
|
244
|
+
// Exact match filters
|
|
245
|
+
const results = await client.records.list('articles', {
|
|
246
|
+
filters: {
|
|
247
|
+
category: 'technology',
|
|
248
|
+
published: true,
|
|
249
|
+
year: 2024
|
|
250
|
+
}
|
|
251
|
+
});
|
|
252
|
+
|
|
253
|
+
// Combine multiple filters
|
|
254
|
+
const filtered = await client.records.list('articles', {
|
|
255
|
+
filters: {
|
|
256
|
+
year: 2024,
|
|
257
|
+
category: 'AI',
|
|
258
|
+
author: 'John Doe'
|
|
259
|
+
},
|
|
260
|
+
limit: 20
|
|
261
|
+
});
|
|
221
262
|
```
|
|
222
263
|
|
|
223
264
|
## Error Handling
|
|
224
265
|
|
|
225
|
-
The SDK provides specific error types for
|
|
266
|
+
The SDK provides specific error types for different failure scenarios.
|
|
226
267
|
|
|
227
268
|
```typescript
|
|
228
269
|
import {
|
|
229
270
|
CortexDBError,
|
|
230
|
-
CortexDBConnectionError,
|
|
231
|
-
CortexDBTimeoutError,
|
|
232
271
|
CortexDBNotFoundError,
|
|
233
272
|
CortexDBValidationError,
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
} from '@cortexdb/sdk';
|
|
273
|
+
CortexDBConnectionError,
|
|
274
|
+
CortexDBTimeoutError
|
|
275
|
+
} from '@dooor-ai/cortexdb';
|
|
238
276
|
|
|
239
277
|
try {
|
|
240
|
-
await client.records.get('
|
|
278
|
+
const record = await client.records.get('articles', 'invalid-id');
|
|
241
279
|
} catch (error) {
|
|
242
280
|
if (error instanceof CortexDBNotFoundError) {
|
|
243
281
|
console.log('Record not found');
|
|
282
|
+
} else if (error instanceof CortexDBValidationError) {
|
|
283
|
+
console.log('Invalid data:', error.message);
|
|
244
284
|
} else if (error instanceof CortexDBConnectionError) {
|
|
245
|
-
console.log('Connection failed');
|
|
285
|
+
console.log('Connection failed:', error.message);
|
|
246
286
|
} else if (error instanceof CortexDBTimeoutError) {
|
|
247
|
-
console.log('Request timed out');
|
|
248
|
-
} else {
|
|
249
|
-
console.log('
|
|
287
|
+
console.log('Request timed out:', error.message);
|
|
288
|
+
} else if (error instanceof CortexDBError) {
|
|
289
|
+
console.log('General error:', error.message);
|
|
250
290
|
}
|
|
251
291
|
}
|
|
252
292
|
```
|
|
253
293
|
|
|
254
294
|
## Examples
|
|
255
295
|
|
|
256
|
-
Check
|
|
296
|
+
Check the [`examples/`](./examples) directory for complete working examples:
|
|
257
297
|
|
|
258
|
-
- [quickstart.ts](./examples/quickstart.ts) - Complete
|
|
259
|
-
- [search.ts](./examples/search.ts) - Semantic search with filters
|
|
260
|
-
- [basic.ts](./examples/basic.ts) - Basic operations
|
|
298
|
+
- [`quickstart.ts`](./examples/quickstart.ts) - Complete walkthrough of SDK features
|
|
299
|
+
- [`search.ts`](./examples/search.ts) - Semantic search with filters and providers
|
|
300
|
+
- [`basic.ts`](./examples/basic.ts) - Basic CRUD operations
|
|
261
301
|
|
|
262
302
|
Run examples:
|
|
263
303
|
|
|
264
304
|
```bash
|
|
265
|
-
|
|
266
|
-
npx ts-node -O '{"module":"commonjs"}' quickstart.ts
|
|
305
|
+
npx ts-node -O '{"module":"commonjs"}' examples/quickstart.ts
|
|
267
306
|
```
|
|
268
307
|
|
|
269
308
|
## Development
|
|
270
309
|
|
|
310
|
+
### Setup
|
|
311
|
+
|
|
271
312
|
```bash
|
|
313
|
+
# Clone repository
|
|
314
|
+
git clone https://github.com/yourusername/cortexdb
|
|
315
|
+
cd cortexdb/clients/typescript
|
|
316
|
+
|
|
272
317
|
# Install dependencies
|
|
273
318
|
npm install
|
|
274
319
|
|
|
275
320
|
# Build
|
|
276
321
|
npm run build
|
|
322
|
+
```
|
|
277
323
|
|
|
278
|
-
|
|
324
|
+
### Scripts
|
|
325
|
+
|
|
326
|
+
```bash
|
|
327
|
+
# Build TypeScript
|
|
328
|
+
npm run build
|
|
329
|
+
|
|
330
|
+
# Build in watch mode
|
|
279
331
|
npm run build:watch
|
|
280
332
|
|
|
281
|
-
# Clean
|
|
333
|
+
# Clean build artifacts
|
|
282
334
|
npm run clean
|
|
335
|
+
|
|
336
|
+
# Lint code
|
|
337
|
+
npm run lint
|
|
338
|
+
|
|
339
|
+
# Format code
|
|
340
|
+
npm run format
|
|
283
341
|
```
|
|
284
342
|
|
|
285
|
-
##
|
|
343
|
+
## Requirements
|
|
286
344
|
|
|
287
|
-
|
|
345
|
+
- Node.js >= 18.0.0 (for native fetch support)
|
|
346
|
+
- CortexDB gateway running locally or remotely
|
|
347
|
+
- Embedding provider configured (OpenAI, Gemini, etc.) if using vectorization
|
|
288
348
|
|
|
289
|
-
##
|
|
349
|
+
## Architecture
|
|
350
|
+
|
|
351
|
+
CortexDB integrates multiple technologies:
|
|
290
352
|
|
|
291
|
-
|
|
353
|
+
- **PostgreSQL**: Stores structured data and metadata
|
|
354
|
+
- **Qdrant**: Vector database for semantic search
|
|
355
|
+
- **MinIO**: Object storage for files
|
|
356
|
+
- **Docling**: Advanced document processing and text extraction
|
|
292
357
|
|
|
293
|
-
|
|
358
|
+
The SDK abstracts this complexity into a simple, unified API.
|
|
359
|
+
|
|
360
|
+
## License
|
|
294
361
|
|
|
295
|
-
-
|
|
296
|
-
- 🐛 [Issue Tracker](https://github.com/cortexdb/cortexdb/issues)
|
|
297
|
-
- 💬 [Discussions](https://github.com/cortexdb/cortexdb/discussions)
|
|
362
|
+
MIT License - see [LICENSE](./LICENSE) for details.
|
|
298
363
|
|
|
299
364
|
## Related
|
|
300
365
|
|
|
301
366
|
- [CortexDB Python SDK](../python) - Python client for CortexDB
|
|
302
|
-
- [CortexDB
|
|
367
|
+
- [CortexDB Documentation](../../docs) - Complete platform documentation
|
package/package.json
CHANGED