@dooor-ai/cortexdb 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/README.md +137 -60
  2. package/package.json +1 -1
package/README.md CHANGED
@@ -1,15 +1,29 @@
1
1
  # CortexDB TypeScript SDK
2
2
 
3
- Official TypeScript/JavaScript client for CortexDB - Multi-modal RAG Platform.
3
+ Official TypeScript/JavaScript client for CortexDB.
4
+
5
+ ## What is CortexDB?
6
+
7
+ CortexDB is a multi-modal RAG (Retrieval Augmented Generation) platform that combines traditional database capabilities with vector search and advanced document processing. It enables you to:
8
+
9
+ - Store structured and unstructured data in a unified database
10
+ - Automatically extract text from documents (PDF, DOCX, XLSX) using Docling
11
+ - Generate embeddings for semantic search using various providers (OpenAI, Gemini, etc.)
12
+ - Perform hybrid search combining filters with vector similarity
13
+ - Build RAG applications with automatic chunking and vectorization
14
+
15
+ CortexDB handles the complex infrastructure of vector databases (Qdrant), object storage (MinIO), and traditional databases (PostgreSQL) behind a simple API.
4
16
 
5
17
  ## Features
6
18
 
7
- - Full TypeScript support with type definitions
8
- - Async/await API using native fetch
9
- - Semantic search with vector embeddings
10
- - Collection and record management
11
- - Custom error types for better debugging
12
- - Works with Node.js 18+, Deno, and modern browsers
19
+ - **Multi-modal document processing**: Upload PDFs, DOCX, XLSX files and automatically extract text with OCR fallback
20
+ - **Semantic search**: Vector-based search using embeddings from OpenAI, Gemini, or custom providers
21
+ - **Automatic chunking**: Smart text splitting optimized for RAG applications
22
+ - **Flexible schema**: Define collections with typed fields (string, number, boolean, file, array)
23
+ - **Hybrid queries**: Combine exact filters with semantic search
24
+ - **Storage control**: Choose where each field is stored (PostgreSQL, Qdrant, MinIO)
25
+ - **Type-safe**: Full TypeScript support with comprehensive type definitions
26
+ - **Modern API**: Async/await using native fetch (Node.js 18+)
13
27
 
14
28
  ## Installation
15
29
 
@@ -39,28 +53,34 @@ async function main() {
39
53
  baseUrl: 'http://localhost:8000'
40
54
  });
41
55
 
42
- // Create a collection
43
- await client.collections.create('documents', [
44
- { name: 'title', type: FieldType.STRING },
45
- { name: 'content', type: FieldType.STRING, vectorize: true }
46
- ]);
56
+ // Create a collection with vectorization enabled
57
+ await client.collections.create(
58
+ 'documents',
59
+ [
60
+ { name: 'title', type: FieldType.STRING },
61
+ { name: 'content', type: FieldType.STRING, vectorize: true }
62
+ ],
63
+ 'your-embedding-provider-id' // Required when vectorize=true
64
+ );
47
65
 
48
66
  // Create a record
49
67
  const record = await client.records.create('documents', {
50
- title: 'Hello World',
51
- content: 'This is my first document'
68
+ title: 'Introduction to AI',
69
+ content: 'Artificial intelligence is transforming how we build software...'
52
70
  });
53
71
 
54
- // Semantic search
72
+ // Semantic search - finds relevant content by meaning, not just keywords
55
73
  const results = await client.records.search(
56
74
  'documents',
57
- 'hello world',
75
+ 'How is AI changing software development?',
58
76
  undefined,
59
77
  10
60
78
  );
61
79
 
62
80
  results.results.forEach(result => {
63
- console.log(`Score: ${result.score.toFixed(4)} - ${result.record.data.title}`);
81
+ console.log(`Score: ${result.score.toFixed(4)}`);
82
+ console.log(`Title: ${result.record.data.title}`);
83
+ console.log(`Content: ${result.record.data.content}\n`);
64
84
  });
65
85
 
66
86
  await client.close();
@@ -81,7 +101,7 @@ const client = new CortexClient({
81
101
  baseUrl: 'http://localhost:8000'
82
102
  });
83
103
 
84
- // With API key
104
+ // Production with API key
85
105
  const client = new CortexClient({
86
106
  baseUrl: 'https://api.cortexdb.com',
87
107
  apiKey: 'your-api-key'
@@ -96,44 +116,52 @@ const client = new CortexClient({
96
116
 
97
117
  ### Collections
98
118
 
119
+ Collections define the schema for your data. Each collection can have multiple fields with different types and storage options.
120
+
99
121
  ```typescript
100
122
  import { FieldType, StoreLocation } from '@dooor-ai/cortexdb';
101
123
 
102
- // Create collection
103
- const collection = await client.collections.create('articles', [
104
- {
105
- name: 'title',
106
- type: FieldType.STRING
107
- },
108
- {
109
- name: 'content',
110
- type: FieldType.STRING,
111
- vectorize: true // Enable semantic search
112
- },
113
- {
114
- name: 'year',
115
- type: FieldType.NUMBER,
116
- store_in: [StoreLocation.POSTGRES, StoreLocation.QDRANT_PAYLOAD]
117
- }
118
- ]);
124
+ // Create collection with vectorization
125
+ const collection = await client.collections.create(
126
+ 'articles',
127
+ [
128
+ {
129
+ name: 'title',
130
+ type: FieldType.STRING
131
+ },
132
+ {
133
+ name: 'content',
134
+ type: FieldType.STRING,
135
+ vectorize: true // Enable semantic search on this field
136
+ },
137
+ {
138
+ name: 'year',
139
+ type: FieldType.NUMBER,
140
+ store_in: [StoreLocation.POSTGRES, StoreLocation.QDRANT_PAYLOAD]
141
+ }
142
+ ],
143
+ 'embedding-provider-id' // Required when any field has vectorize=true
144
+ );
119
145
 
120
146
  // List collections
121
147
  const collections = await client.collections.list();
122
148
 
123
- // Get collection
149
+ // Get collection schema
124
150
  const schema = await client.collections.get('articles');
125
151
 
126
- // Delete collection
152
+ // Delete collection and all its records
127
153
  await client.collections.delete('articles');
128
154
  ```
129
155
 
130
156
  ### Records
131
157
 
158
+ Records are the actual data stored in collections. They must match the collection schema.
159
+
132
160
  ```typescript
133
161
  // Create record
134
162
  const record = await client.records.create('articles', {
135
163
  title: 'Machine Learning Basics',
136
- content: 'Introduction to ML concepts...',
164
+ content: 'Machine learning is a subset of AI that focuses on learning from data...',
137
165
  year: 2024
138
166
  });
139
167
 
@@ -148,7 +176,7 @@ const updated = await client.records.update('articles', record.id, {
148
176
  // Delete record
149
177
  await client.records.delete('articles', record.id);
150
178
 
151
- // List records
179
+ // List records with pagination
152
180
  const results = await client.records.list('articles', {
153
181
  limit: 10,
154
182
  offset: 0
@@ -157,8 +185,10 @@ const results = await client.records.list('articles', {
157
185
 
158
186
  ### Semantic Search
159
187
 
188
+ Semantic search finds records by meaning, not just exact keyword matches. It uses vector embeddings to understand context.
189
+
160
190
  ```typescript
161
- // Basic search
191
+ // Basic semantic search
162
192
  const results = await client.records.search(
163
193
  'articles',
164
194
  'machine learning fundamentals',
@@ -166,7 +196,7 @@ const results = await client.records.search(
166
196
  10
167
197
  );
168
198
 
169
- // Search with filters
199
+ // Search with filters - combine semantic search with exact matches
170
200
  const filteredResults = await client.records.search(
171
201
  'articles',
172
202
  'neural networks',
@@ -177,30 +207,55 @@ const filteredResults = await client.records.search(
177
207
  5
178
208
  );
179
209
 
180
- // Process results
210
+ // Process results - ordered by relevance score
181
211
  filteredResults.results.forEach(result => {
182
- console.log(`Score: ${result.score.toFixed(4)}`);
212
+ console.log(`Score: ${result.score.toFixed(4)}`); // Higher = more relevant
183
213
  console.log(`Title: ${result.record.data.title}`);
184
214
  console.log(`Year: ${result.record.data.year}`);
185
215
  });
186
216
  ```
187
217
 
218
+ ### Working with Files
219
+
220
+ CortexDB can process documents and automatically extract text for vectorization.
221
+
222
+ ```typescript
223
+ // Create collection with file field
224
+ await client.collections.create(
225
+ 'documents',
226
+ [
227
+ { name: 'title', type: FieldType.STRING },
228
+ {
229
+ name: 'document',
230
+ type: FieldType.FILE,
231
+ vectorize: true // Extract text and create embeddings
232
+ }
233
+ ],
234
+ 'embedding-provider-id'
235
+ );
236
+
237
+ // Note: File upload support is currently available in the REST API
238
+ // TypeScript SDK file upload will be added in a future version
239
+ ```
240
+
188
241
  ### Filter Operators
189
242
 
190
243
  ```typescript
191
- // Exact match
244
+ // Exact match filters
192
245
  const results = await client.records.list('articles', {
193
246
  filters: {
194
247
  category: 'technology',
195
- published: true
248
+ published: true,
249
+ year: 2024
196
250
  }
197
251
  });
198
252
 
199
- // Combine filters
253
+ // Combine multiple filters
200
254
  const filtered = await client.records.list('articles', {
201
255
  filters: {
202
256
  year: 2024,
203
- category: 'AI'
257
+ category: 'AI',
258
+ author: 'John Doe'
204
259
  },
205
260
  limit: 20
206
261
  });
@@ -208,12 +263,15 @@ const filtered = await client.records.list('articles', {
208
263
 
209
264
  ## Error Handling
210
265
 
266
+ The SDK provides specific error types for different failure scenarios.
267
+
211
268
  ```typescript
212
269
  import {
213
270
  CortexDBError,
214
271
  CortexDBNotFoundError,
215
272
  CortexDBValidationError,
216
- CortexDBConnectionError
273
+ CortexDBConnectionError,
274
+ CortexDBTimeoutError
217
275
  } from '@dooor-ai/cortexdb';
218
276
 
219
277
  try {
@@ -222,9 +280,11 @@ try {
222
280
  if (error instanceof CortexDBNotFoundError) {
223
281
  console.log('Record not found');
224
282
  } else if (error instanceof CortexDBValidationError) {
225
- console.log('Validation error:', error.message);
283
+ console.log('Invalid data:', error.message);
226
284
  } else if (error instanceof CortexDBConnectionError) {
227
285
  console.log('Connection failed:', error.message);
286
+ } else if (error instanceof CortexDBTimeoutError) {
287
+ console.log('Request timed out:', error.message);
228
288
  } else if (error instanceof CortexDBError) {
229
289
  console.log('General error:', error.message);
230
290
  }
@@ -233,11 +293,11 @@ try {
233
293
 
234
294
  ## Examples
235
295
 
236
- Check the [`examples/`](./examples) directory for more usage examples:
296
+ Check the [`examples/`](./examples) directory for complete working examples:
237
297
 
238
- - [`quickstart.ts`](./examples/quickstart.ts) - Walkthrough of SDK features
239
- - [`search.ts`](./examples/search.ts) - Semantic search with filters
240
- - [`basic.ts`](./examples/basic.ts) - Basic operations
298
+ - [`quickstart.ts`](./examples/quickstart.ts) - Complete walkthrough of SDK features
299
+ - [`search.ts`](./examples/search.ts) - Semantic search with filters and providers
300
+ - [`basic.ts`](./examples/basic.ts) - Basic CRUD operations
241
301
 
242
302
  Run examples:
243
303
 
@@ -264,27 +324,44 @@ npm run build
264
324
  ### Scripts
265
325
 
266
326
  ```bash
267
- # Build
327
+ # Build TypeScript
268
328
  npm run build
269
329
 
270
- # Watch mode
330
+ # Build in watch mode
271
331
  npm run build:watch
272
332
 
273
- # Clean
333
+ # Clean build artifacts
274
334
  npm run clean
275
335
 
276
- # Lint
336
+ # Lint code
277
337
  npm run lint
278
338
 
279
- # Format
339
+ # Format code
280
340
  npm run format
281
341
  ```
282
342
 
283
343
  ## Requirements
284
344
 
285
- - Node.js >= 18.0.0
286
- - CortexDB gateway running (local or remote)
345
+ - Node.js >= 18.0.0 (for native fetch support)
346
+ - CortexDB gateway running locally or remotely
347
+ - Embedding provider configured (OpenAI, Gemini, etc.) if using vectorization
348
+
349
+ ## Architecture
350
+
351
+ CortexDB integrates multiple technologies:
352
+
353
+ - **PostgreSQL**: Stores structured data and metadata
354
+ - **Qdrant**: Vector database for semantic search
355
+ - **MinIO**: Object storage for files
356
+ - **Docling**: Advanced document processing and text extraction
357
+
358
+ The SDK abstracts this complexity into a simple, unified API.
287
359
 
288
360
  ## License
289
361
 
290
362
  MIT License - see [LICENSE](./LICENSE) for details.
363
+
364
+ ## Related
365
+
366
+ - [CortexDB Python SDK](../python) - Python client for CortexDB
367
+ - [CortexDB Documentation](../../docs) - Complete platform documentation
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@dooor-ai/cortexdb",
3
- "version": "0.1.1",
3
+ "version": "0.1.2",
4
4
  "description": "Official TypeScript/JavaScript SDK for CortexDB - Multi-modal RAG Platform with advanced document processing",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",