ruvector 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  {
2
- "startTime": 1763752189752,
3
- "sessionId": "session-1763752189752",
4
- "lastActivity": 1763752189752,
2
+ "startTime": 1763756440172,
3
+ "sessionId": "session-1763756440172",
4
+ "lastActivity": 1763756440172,
5
5
  "sessionDuration": 0,
6
6
  "totalTasks": 1,
7
7
  "successfulTasks": 1,
@@ -1,10 +1,10 @@
1
1
  [
2
2
  {
3
- "id": "cmd-hooks-1763752189958",
3
+ "id": "cmd-hooks-1763756440446",
4
4
  "type": "hooks",
5
5
  "success": true,
6
- "duration": 12.741968000000043,
7
- "timestamp": 1763752189971,
6
+ "duration": 57.10047099999997,
7
+ "timestamp": 1763756440503,
8
8
  "metadata": {}
9
9
  }
10
10
  ]
package/README.md CHANGED
@@ -77,86 +77,214 @@ Ruvector is purpose-built for **modern JavaScript/TypeScript applications** that
77
77
  - šŸš€ **Production Ready**: Battle-tested algorithms with comprehensive benchmarks
78
78
  - šŸ”“ **Open Source**: MIT licensed, community-driven
79
79
 
80
- ## šŸš€ Quick Start
80
+ ## šŸš€ Quick Start Tutorial
81
81
 
82
- ### Installation
82
+ ### Step 1: Installation
83
+
84
+ Install Ruvector with a single npm command:
83
85
 
84
86
  ```bash
85
87
  npm install ruvector
86
88
  ```
87
89
 
88
- The package automatically installs the correct native module for your platform, or uses the WASM fallback if native is unavailable.
90
+ **What happens during installation:**
91
+ - npm automatically detects your platform (Linux, macOS, Windows)
92
+ - Downloads the correct native binary for maximum performance
93
+ - Falls back to WebAssembly if native binaries aren't available
94
+ - No additional setup, Docker, or external services required
95
+
96
+ **Verify installation:**
97
+ ```bash
98
+ npx ruvector info
99
+ ```
100
+
101
+ You should see your platform and implementation type (native Rust or WASM fallback).
102
+
103
+ ### Step 2: Your First Vector Database
89
104
 
90
- ### Basic Usage
105
+ Let's create a simple vector database and perform basic operations. This example demonstrates the complete CRUD (Create, Read, Update, Delete) workflow:
91
106
 
92
107
  ```javascript
93
108
  const { VectorDb } = require('ruvector');
94
109
 
95
- async function example() {
96
- // Create database with 128 dimensions
110
+ async function tutorial() {
111
+ // Step 2.1: Create a new vector database
112
+ // The 'dimensions' parameter must match your embedding model
113
+ // Common sizes: 128, 384 (sentence-transformers), 768 (BERT), 1536 (OpenAI)
97
114
  const db = new VectorDb({
98
- dimensions: 128,
99
- maxElements: 10000,
100
- storagePath: './vectors.db'
115
+ dimensions: 128, // Vector size - MUST match your embeddings
116
+ maxElements: 10000, // Maximum vectors (can grow automatically)
117
+ storagePath: './my-vectors.db' // Persist to disk (omit for in-memory)
101
118
  });
102
119
 
103
- // Insert a vector
104
- const vector = new Float32Array(128).map(() => Math.random());
105
- const id = await db.insert({
106
- id: 'doc_1',
107
- vector: vector,
108
- metadata: { title: 'Example Document' }
109
- });
120
+ console.log('āœ… Database created successfully');
121
+
122
+ // Step 2.2: Insert vectors
123
+ // In real applications, these would come from an embedding model
124
+ const documents = [
125
+ { id: 'doc1', text: 'Artificial intelligence and machine learning' },
126
+ { id: 'doc2', text: 'Deep learning neural networks' },
127
+ { id: 'doc3', text: 'Natural language processing' },
128
+ ];
129
+
130
+ for (const doc of documents) {
131
+ // Generate random vector for demonstration
132
+ // In production: use OpenAI, Cohere, or sentence-transformers
133
+ const vector = new Float32Array(128).map(() => Math.random());
134
+
135
+ await db.insert({
136
+ id: doc.id,
137
+ vector: vector,
138
+ metadata: {
139
+ text: doc.text,
140
+ timestamp: Date.now(),
141
+ category: 'AI'
142
+ }
143
+ });
144
+
145
+ console.log(`āœ… Inserted: ${doc.id}`);
146
+ }
110
147
 
111
- console.log(`Inserted vector with ID: ${id}`);
148
+ // Step 2.3: Search for similar vectors
149
+ // Create a query vector (in production, this would be from your search query)
150
+ const queryVector = new Float32Array(128).map(() => Math.random());
112
151
 
113
- // Search for similar vectors
114
152
  const results = await db.search({
115
- vector: vector,
116
- k: 10
153
+ vector: queryVector,
154
+ k: 5, // Return top 5 most similar vectors
155
+ threshold: 0.7 // Only return results with similarity > 0.7
117
156
  });
118
157
 
119
- console.log('Top 10 similar vectors:', results);
120
- // Output: [{ id: 'doc_1', score: 1.0, metadata: {...} }, ...]
158
+ console.log('\nšŸ” Search Results:');
159
+ results.forEach((result, index) => {
160
+ console.log(`${index + 1}. ${result.id} - Score: ${result.score.toFixed(3)}`);
161
+ console.log(` Text: ${result.metadata.text}`);
162
+ });
121
163
 
122
- // Get vector count
164
+ // Step 2.4: Retrieve a specific vector
165
+ const retrieved = await db.get('doc1');
166
+ if (retrieved) {
167
+ console.log('\nšŸ“„ Retrieved document:', retrieved.metadata.text);
168
+ }
169
+
170
+ // Step 2.5: Get database statistics
123
171
  const count = await db.len();
124
- console.log(`Total vectors: ${count}`);
172
+ console.log(`\nšŸ“Š Total vectors in database: ${count}`);
173
+
174
+ // Step 2.6: Delete a vector
175
+ const deleted = await db.delete('doc1');
176
+ console.log(`\nšŸ—‘ļø Deleted doc1: ${deleted ? 'Success' : 'Not found'}`);
125
177
 
126
- // Delete a vector
127
- const deleted = await db.delete('doc_1');
128
- console.log(`Deleted: ${deleted}`);
178
+ // Final count
179
+ const finalCount = await db.len();
180
+ console.log(`šŸ“Š Final count: ${finalCount}`);
129
181
  }
130
182
 
131
- example();
183
+ // Run the tutorial
184
+ tutorial().catch(console.error);
185
+ ```
186
+
187
+ **Expected Output:**
188
+ ```
189
+ āœ… Database created successfully
190
+ āœ… Inserted: doc1
191
+ āœ… Inserted: doc2
192
+ āœ… Inserted: doc3
193
+
194
+ šŸ” Search Results:
195
+ 1. doc2 - Score: 0.892
196
+ Text: Deep learning neural networks
197
+ 2. doc1 - Score: 0.856
198
+ Text: Artificial intelligence and machine learning
199
+ 3. doc3 - Score: 0.801
200
+ Text: Natural language processing
201
+
202
+ šŸ“„ Retrieved document: Artificial intelligence and machine learning
203
+
204
+ šŸ“Š Total vectors in database: 3
205
+
206
+ šŸ—‘ļø Deleted doc1: Success
207
+ šŸ“Š Final count: 2
132
208
  ```
133
209
 
134
- ### TypeScript Support
210
+ ### Step 3: TypeScript Tutorial
135
211
 
136
- Full TypeScript support with complete type definitions:
212
+ Ruvector provides full TypeScript support with complete type safety. Here's how to use it:
137
213
 
138
214
  ```typescript
139
215
  import { VectorDb, VectorEntry, SearchQuery, SearchResult } from 'ruvector';
140
216
 
141
- const db = new VectorDb({
142
- dimensions: 128,
143
- maxElements: 10000,
144
- storagePath: './vectors.db'
145
- });
217
+ // Step 3.1: Define your custom metadata type
218
+ interface DocumentMetadata {
219
+ title: string;
220
+ content: string;
221
+ author: string;
222
+ date: Date;
223
+ tags: string[];
224
+ }
146
225
 
147
- // Fully typed operations
148
- const entry: VectorEntry = {
149
- id: 'doc_1',
150
- vector: new Float32Array(128),
151
- metadata: { title: 'Example' }
152
- };
226
+ async function typescriptTutorial() {
227
+ // Step 3.2: Create typed database
228
+ const db = new VectorDb({
229
+ dimensions: 384, // sentence-transformers/all-MiniLM-L6-v2
230
+ maxElements: 10000,
231
+ storagePath: './typed-vectors.db'
232
+ });
153
233
 
154
- const results: SearchResult[] = await db.search({
155
- vector: new Float32Array(128),
156
- k: 10
157
- });
234
+ // Step 3.3: Type-safe vector entry
235
+ const entry: VectorEntry<DocumentMetadata> = {
236
+ id: 'article-001',
237
+ vector: new Float32Array(384), // Your embedding here
238
+ metadata: {
239
+ title: 'Introduction to Vector Databases',
240
+ content: 'Vector databases enable semantic search...',
241
+ author: 'Jane Doe',
242
+ date: new Date('2024-01-15'),
243
+ tags: ['database', 'AI', 'search']
244
+ }
245
+ };
246
+
247
+ // Step 3.4: Insert with type checking
248
+ await db.insert(entry);
249
+ console.log('āœ… Inserted typed document');
250
+
251
+ // Step 3.5: Type-safe search
252
+ const query: SearchQuery = {
253
+ vector: new Float32Array(384),
254
+ k: 10,
255
+ threshold: 0.8
256
+ };
257
+
258
+ // Step 3.6: Fully typed results
259
+ const results: SearchResult<DocumentMetadata>[] = await db.search(query);
260
+
261
+ // TypeScript knows the exact shape of metadata
262
+ results.forEach(result => {
263
+ console.log(`Title: ${result.metadata.title}`);
264
+ console.log(`Author: ${result.metadata.author}`);
265
+ console.log(`Tags: ${result.metadata.tags.join(', ')}`);
266
+ console.log(`Similarity: ${result.score.toFixed(3)}\n`);
267
+ });
268
+
269
+ // Step 3.7: Type-safe retrieval
270
+ const doc = await db.get('article-001');
271
+ if (doc) {
272
+ // TypeScript autocomplete works perfectly here
273
+ const publishYear = doc.metadata.date.getFullYear();
274
+ console.log(`Published in ${publishYear}`);
275
+ }
276
+ }
277
+
278
+ typescriptTutorial().catch(console.error);
158
279
  ```
159
280
 
281
+ **TypeScript Benefits:**
282
+ - āœ… Full autocomplete for all methods and properties
283
+ - āœ… Compile-time type checking prevents errors
284
+ - āœ… IDE IntelliSense shows documentation
285
+ - āœ… Custom metadata types for your use case
286
+ - āœ… No `any` types - fully typed throughout
287
+
160
288
  ## šŸŽÆ Platform Detection
161
289
 
162
290
  Ruvector automatically detects the best implementation for your platform:
@@ -379,136 +507,736 @@ Comprehensive comparison of Ruvector against popular vector database solutions:
379
507
  - āœ… Built-in persistence and metadata
380
508
  - āš ļø Slightly lower recall at same performance
381
509
 
382
- ## šŸŽÆ Use Cases
510
+ ## šŸŽÆ Real-World Tutorials
511
+
512
+ ### Tutorial 1: Building a RAG System with OpenAI
383
513
 
384
- ### RAG Systems (Retrieval-Augmented Generation)
514
+ **What you'll learn:** Create a production-ready Retrieval-Augmented Generation system that enhances LLM responses with relevant context from your documents.
515
+
516
+ **Prerequisites:**
517
+ ```bash
518
+ npm install ruvector openai
519
+ export OPENAI_API_KEY="your-api-key-here"
520
+ ```
521
+
522
+ **Complete Implementation:**
385
523
 
386
524
  ```javascript
387
525
  const { VectorDb } = require('ruvector');
388
- const openai = require('openai');
526
+ const OpenAI = require('openai');
527
+
528
+ class RAGSystem {
529
+ constructor() {
530
+ // Initialize OpenAI client
531
+ this.openai = new OpenAI({
532
+ apiKey: process.env.OPENAI_API_KEY
533
+ });
389
534
 
390
- const db = new VectorDb({ dimensions: 1536 }); // OpenAI ada-002
535
+ // Create vector database for OpenAI embeddings
536
+ // text-embedding-ada-002 produces 1536-dimensional vectors
537
+ this.db = new VectorDb({
538
+ dimensions: 1536,
539
+ maxElements: 100000,
540
+ storagePath: './rag-knowledge-base.db'
541
+ });
542
+
543
+ console.log('āœ… RAG System initialized');
544
+ }
391
545
 
392
- async function indexDocuments(texts) {
393
- for (const text of texts) {
394
- const embedding = await openai.embeddings.create({
546
+ // Step 1: Index your knowledge base
547
+ async indexDocuments(documents) {
548
+ console.log(`šŸ“š Indexing ${documents.length} documents...`);
549
+
550
+ for (let i = 0; i < documents.length; i++) {
551
+ const doc = documents[i];
552
+
553
+ // Generate embedding for the document
554
+ const response = await this.openai.embeddings.create({
555
+ model: 'text-embedding-ada-002',
556
+ input: doc.content
557
+ });
558
+
559
+ // Store in vector database
560
+ await this.db.insert({
561
+ id: doc.id || `doc_${i}`,
562
+ vector: new Float32Array(response.data[0].embedding),
563
+ metadata: {
564
+ title: doc.title,
565
+ content: doc.content,
566
+ source: doc.source,
567
+ date: doc.date || new Date().toISOString()
568
+ }
569
+ });
570
+
571
+ console.log(` āœ… Indexed: ${doc.title}`);
572
+ }
573
+
574
+ const count = await this.db.len();
575
+ console.log(`\nāœ… Indexed ${count} documents total`);
576
+ }
577
+
578
+ // Step 2: Retrieve relevant context for a query
579
+ async retrieveContext(query, k = 3) {
580
+ console.log(`šŸ” Searching for: "${query}"`);
581
+
582
+ // Generate embedding for the query
583
+ const response = await this.openai.embeddings.create({
395
584
  model: 'text-embedding-ada-002',
396
- input: text
585
+ input: query
397
586
  });
398
587
 
399
- await db.insert({
400
- id: text.slice(0, 20),
401
- vector: new Float32Array(embedding.data[0].embedding),
402
- metadata: { text }
588
+ // Search for similar documents
589
+ const results = await this.db.search({
590
+ vector: new Float32Array(response.data[0].embedding),
591
+ k: k,
592
+ threshold: 0.7 // Only use highly relevant results
403
593
  });
594
+
595
+ console.log(`šŸ“„ Found ${results.length} relevant documents\n`);
596
+
597
+ return results.map(r => ({
598
+ content: r.metadata.content,
599
+ title: r.metadata.title,
600
+ score: r.score
601
+ }));
404
602
  }
405
- }
406
603
 
407
- async function search(query) {
408
- const embedding = await openai.embeddings.create({
409
- model: 'text-embedding-ada-002',
410
- input: query
411
- });
604
+ // Step 3: Generate answer with retrieved context
605
+ async answer(question) {
606
+ // Retrieve relevant context
607
+ const context = await this.retrieveContext(question, 3);
412
608
 
413
- return await db.search({
414
- vector: new Float32Array(embedding.data[0].embedding),
415
- k: 5
416
- });
609
+ if (context.length === 0) {
610
+ return "I don't have enough information to answer that question.";
611
+ }
612
+
613
+ // Build prompt with context
614
+ const contextText = context
615
+ .map((doc, i) => `[${i + 1}] ${doc.title}\n${doc.content}`)
616
+ .join('\n\n');
617
+
618
+ const prompt = `Answer the question based on the following context. If the context doesn't contain the answer, say so.
619
+
620
+ Context:
621
+ ${contextText}
622
+
623
+ Question: ${question}
624
+
625
+ Answer:`;
626
+
627
+ console.log('šŸ¤– Generating answer...\n');
628
+
629
+ // Generate completion
630
+ const completion = await this.openai.chat.completions.create({
631
+ model: 'gpt-4',
632
+ messages: [
633
+ { role: 'system', content: 'You are a helpful assistant that answers questions based on provided context.' },
634
+ { role: 'user', content: prompt }
635
+ ],
636
+ temperature: 0.3 // Lower temperature for more factual responses
637
+ });
638
+
639
+ return {
640
+ answer: completion.choices[0].message.content,
641
+ sources: context.map(c => c.title)
642
+ };
643
+ }
417
644
  }
645
+
646
+ // Example Usage
647
+ async function main() {
648
+ const rag = new RAGSystem();
649
+
650
+ // Step 1: Index your knowledge base
651
+ const documents = [
652
+ {
653
+ id: 'doc1',
654
+ title: 'Ruvector Introduction',
655
+ content: 'Ruvector is a high-performance vector database for Node.js built in Rust. It provides sub-millisecond query latency and supports over 52,000 inserts per second.',
656
+ source: 'documentation'
657
+ },
658
+ {
659
+ id: 'doc2',
660
+ title: 'Vector Databases Explained',
661
+ content: 'Vector databases store data as high-dimensional vectors, enabling semantic similarity search. They are essential for AI applications like RAG systems and recommendation engines.',
662
+ source: 'blog'
663
+ },
664
+ {
665
+ id: 'doc3',
666
+ title: 'HNSW Algorithm',
667
+ content: 'Hierarchical Navigable Small World (HNSW) is a graph-based algorithm for approximate nearest neighbor search. It provides excellent recall with low latency.',
668
+ source: 'research'
669
+ }
670
+ ];
671
+
672
+ await rag.indexDocuments(documents);
673
+
674
+ // Step 2: Ask questions
675
+ console.log('\n' + '='.repeat(60) + '\n');
676
+
677
+ const result = await rag.answer('What is Ruvector and what are its performance characteristics?');
678
+
679
+ console.log('šŸ“ Answer:', result.answer);
680
+ console.log('\nšŸ“š Sources:', result.sources.join(', '));
681
+ }
682
+
683
+ main().catch(console.error);
418
684
  ```
419
685
 
420
- ### Semantic Search
686
+ **Expected Output:**
687
+ ```
688
+ āœ… RAG System initialized
689
+ šŸ“š Indexing 3 documents...
690
+ āœ… Indexed: Ruvector Introduction
691
+ āœ… Indexed: Vector Databases Explained
692
+ āœ… Indexed: HNSW Algorithm
421
693
 
422
- ```javascript
423
- const { VectorDb } = require('ruvector');
694
+ āœ… Indexed 3 documents total
424
695
 
425
- // Create database for document embeddings
426
- const db = new VectorDb({
427
- dimensions: 384, // sentence-transformers
428
- storagePath: './documents.db'
429
- });
696
+ ============================================================
430
697
 
431
- // Index documents
432
- await db.insert({
433
- id: 'doc1',
434
- vector: embeddingModel.encode('Artificial intelligence is transforming industries'),
435
- metadata: {
436
- title: 'AI Revolution',
437
- content: 'Artificial intelligence is transforming industries...',
438
- author: 'John Doe',
439
- date: '2024-01-15'
440
- }
441
- });
698
+ šŸ” Searching for: "What is Ruvector and what are its performance characteristics?"
699
+ šŸ“„ Found 2 relevant documents
442
700
 
443
- // Search with metadata filtering
444
- const results = await db.search({
445
- vector: embeddingModel.encode('machine learning applications'),
446
- k: 10,
447
- filter: { author: 'John Doe' }
448
- });
701
+ šŸ¤– Generating answer...
702
+
703
+ šŸ“ Answer: Ruvector is a high-performance vector database built in Rust for Node.js applications. Its key performance characteristics include:
704
+ - Sub-millisecond query latency
705
+ - Over 52,000 inserts per second
706
+ - Optimized for semantic similarity search
707
+
708
+ šŸ“š Sources: Ruvector Introduction, Vector Databases Explained
449
709
  ```
450
710
 
451
- ### Agent Memory (Reflexion)
711
+ **Production Tips:**
712
+ - āœ… Use batch embedding for better throughput (OpenAI supports up to 2048 texts)
713
+ - āœ… Implement caching for frequently asked questions
714
+ - āœ… Add error handling for API rate limits
715
+ - āœ… Monitor token usage and costs
716
+ - āœ… Regularly update your knowledge base
717
+
718
+ ---
719
+
720
+ ### Tutorial 2: Semantic Search Engine
721
+
722
+ **What you'll learn:** Build a semantic search engine that understands meaning, not just keywords.
723
+
724
+ **Prerequisites:**
725
+ ```bash
726
+ npm install ruvector @xenova/transformers
727
+ ```
728
+
729
+ **Complete Implementation:**
452
730
 
453
731
  ```javascript
454
732
  const { VectorDb } = require('ruvector');
733
+ const { pipeline } = require('@xenova/transformers');
455
734
 
456
- // Create memory store for AI agent
457
- const memory = new VectorDb({
458
- dimensions: 768,
459
- storagePath: './agent-memory.db'
460
- });
735
+ class SemanticSearchEngine {
736
+ constructor() {
737
+ this.db = null;
738
+ this.embedder = null;
739
+ }
740
+
741
+ // Step 1: Initialize the embedding model
742
+ async initialize() {
743
+ console.log('šŸš€ Initializing semantic search engine...');
744
+
745
+ // Load sentence-transformers model (runs locally, no API needed!)
746
+ console.log('šŸ“„ Loading embedding model...');
747
+ this.embedder = await pipeline(
748
+ 'feature-extraction',
749
+ 'Xenova/all-MiniLM-L6-v2'
750
+ );
751
+
752
+ // Create vector database (384 dimensions for all-MiniLM-L6-v2)
753
+ this.db = new VectorDb({
754
+ dimensions: 384,
755
+ maxElements: 50000,
756
+ storagePath: './semantic-search.db'
757
+ });
461
758
 
462
- // Store agent experiences
463
- await memory.insert({
464
- id: `exp_${Date.now()}`,
465
- vector: embedExperience(experience),
466
- metadata: {
467
- action: 'navigate',
468
- result: 'success',
469
- timestamp: Date.now(),
470
- reward: 1.0
759
+ console.log('āœ… Search engine ready!\n');
471
760
  }
472
- });
473
761
 
474
- // Retrieve similar experiences
475
- const similarExperiences = await memory.search({
476
- vector: embedCurrentState(state),
477
- k: 5
478
- });
762
+ // Step 2: Generate embeddings
763
+ async embed(text) {
764
+ const output = await this.embedder(text, {
765
+ pooling: 'mean',
766
+ normalize: true
767
+ });
768
+
769
+ // Convert to Float32Array
770
+ return new Float32Array(output.data);
771
+ }
772
+
773
+ // Step 3: Index documents
774
+ async indexDocuments(documents) {
775
+ console.log(`šŸ“š Indexing ${documents.length} documents...`);
776
+
777
+ for (const doc of documents) {
778
+ const vector = await this.embed(doc.content);
779
+
780
+ await this.db.insert({
781
+ id: doc.id,
782
+ vector: vector,
783
+ metadata: {
784
+ title: doc.title,
785
+ content: doc.content,
786
+ category: doc.category,
787
+ url: doc.url
788
+ }
789
+ });
790
+
791
+ console.log(` āœ… ${doc.title}`);
792
+ }
793
+
794
+ const count = await this.db.len();
795
+ console.log(`\nāœ… Indexed ${count} documents\n`);
796
+ }
797
+
798
+ // Step 4: Semantic search
799
+ async search(query, options = {}) {
800
+ const {
801
+ k = 5,
802
+ category = null,
803
+ threshold = 0.3
804
+ } = options;
805
+
806
+ console.log(`šŸ” Searching for: "${query}"`);
807
+
808
+ // Generate query embedding
809
+ const queryVector = await this.embed(query);
810
+
811
+ // Search vector database
812
+ const results = await this.db.search({
813
+ vector: queryVector,
814
+ k: k * 2, // Get more results for filtering
815
+ threshold: threshold
816
+ });
817
+
818
+ // Filter by category if specified
819
+ let filtered = results;
820
+ if (category) {
821
+ filtered = results.filter(r => r.metadata.category === category);
822
+ }
823
+
824
+ // Return top k after filtering
825
+ const final = filtered.slice(0, k);
826
+
827
+ console.log(`šŸ“„ Found ${final.length} results\n`);
828
+
829
+ return final.map(r => ({
830
+ id: r.id,
831
+ title: r.metadata.title,
832
+ content: r.metadata.content,
833
+ category: r.metadata.category,
834
+ score: r.score,
835
+ url: r.metadata.url
836
+ }));
837
+ }
838
+
839
+ // Step 5: Find similar documents
840
+ async findSimilar(documentId, k = 5) {
841
+ const doc = await this.db.get(documentId);
842
+
843
+ if (!doc) {
844
+ throw new Error(`Document ${documentId} not found`);
845
+ }
846
+
847
+ const results = await this.db.search({
848
+ vector: doc.vector,
849
+ k: k + 1 // +1 because the document itself will be included
850
+ });
851
+
852
+ // Remove the document itself from results
853
+ return results
854
+ .filter(r => r.id !== documentId)
855
+ .slice(0, k);
856
+ }
857
+ }
858
+
859
+ // Example Usage
860
+ async function main() {
861
+ const engine = new SemanticSearchEngine();
862
+ await engine.initialize();
863
+
864
+ // Sample documents (in production, load from your database)
865
+ const documents = [
866
+ {
867
+ id: '1',
868
+ title: 'Understanding Neural Networks',
869
+ content: 'Neural networks are computing systems inspired by biological neural networks. They learn to perform tasks by considering examples.',
870
+ category: 'AI',
871
+ url: '/docs/neural-networks'
872
+ },
873
+ {
874
+ id: '2',
875
+ title: 'Introduction to Machine Learning',
876
+ content: 'Machine learning is a subset of artificial intelligence that provides systems the ability to learn and improve from experience.',
877
+ category: 'AI',
878
+ url: '/docs/machine-learning'
879
+ },
880
+ {
881
+ id: '3',
882
+ title: 'Web Development Best Practices',
883
+ content: 'Modern web development involves responsive design, performance optimization, and accessibility considerations.',
884
+ category: 'Web',
885
+ url: '/docs/web-dev'
886
+ },
887
+ {
888
+ id: '4',
889
+ title: 'Deep Learning Applications',
890
+ content: 'Deep learning has revolutionized computer vision, natural language processing, and speech recognition.',
891
+ category: 'AI',
892
+ url: '/docs/deep-learning'
893
+ }
894
+ ];
895
+
896
+ // Index documents
897
+ await engine.indexDocuments(documents);
898
+
899
+ // Example 1: Basic semantic search
900
+ console.log('Example 1: Basic Search\n' + '='.repeat(60));
901
+ const results1 = await engine.search('AI and neural nets');
902
+ results1.forEach((result, i) => {
903
+ console.log(`${i + 1}. ${result.title} (Score: ${result.score.toFixed(3)})`);
904
+ console.log(` ${result.content.slice(0, 80)}...`);
905
+ console.log(` Category: ${result.category}\n`);
906
+ });
907
+
908
+ // Example 2: Category-filtered search
909
+ console.log('\nExample 2: Category-Filtered Search\n' + '='.repeat(60));
910
+ const results2 = await engine.search('learning algorithms', {
911
+ category: 'AI',
912
+ k: 3
913
+ });
914
+ results2.forEach((result, i) => {
915
+ console.log(`${i + 1}. ${result.title} (Score: ${result.score.toFixed(3)})`);
916
+ });
917
+
918
+ // Example 3: Find similar documents
919
+ console.log('\n\nExample 3: Find Similar Documents\n' + '='.repeat(60));
920
+ const similar = await engine.findSimilar('1', 2);
921
+ console.log('Documents similar to "Understanding Neural Networks":');
922
+ similar.forEach((doc, i) => {
923
+ console.log(`${i + 1}. ${doc.metadata.title} (Score: ${doc.score.toFixed(3)})`);
924
+ });
925
+ }
926
+
927
+ main().catch(console.error);
479
928
  ```
480
929
 
481
- ### Product Recommendations
930
+ **Key Features:**
931
+ - āœ… Runs completely locally (no API keys needed)
932
+ - āœ… Understands semantic meaning, not just keywords
933
+ - āœ… Category filtering for better results
934
+ - āœ… "Find similar" functionality
935
+ - āœ… Fast: ~10ms query latency
936
+
937
+ ---
938
+
939
+ ### Tutorial 3: AI Agent Memory System
940
+
941
+ **What you'll learn:** Implement a memory system for AI agents that remembers past experiences and learns from them.
942
+
943
+ **Complete Implementation:**
482
944
 
483
945
  ```javascript
484
946
  const { VectorDb } = require('ruvector');
485
947
 
486
- // Create product embedding database
487
- const products = new VectorDb({
488
- dimensions: 256,
489
- storagePath: './products.db'
490
- });
948
+ class AgentMemory {
949
+ constructor(agentId) {
950
+ this.agentId = agentId;
951
+
952
+ // Create separate databases for different memory types
953
+ this.episodicMemory = new VectorDb({
954
+ dimensions: 768,
955
+ storagePath: `./memory/${agentId}-episodic.db`
956
+ });
491
957
 
492
- // Index product embeddings
493
- await products.insert({
494
- id: 'prod_123',
495
- vector: productEmbedding,
496
- metadata: {
497
- name: 'Wireless Headphones',
498
- category: 'Electronics',
499
- price: 99.99,
500
- rating: 4.5
958
+ this.semanticMemory = new VectorDb({
959
+ dimensions: 768,
960
+ storagePath: `./memory/${agentId}-semantic.db`
961
+ });
962
+
963
+ console.log(`🧠 Memory system initialized for agent: ${agentId}`);
501
964
  }
502
- });
503
965
 
504
- // Get personalized recommendations
505
- const recommendations = await products.search({
506
- vector: userPreferenceEmbedding,
507
- k: 10,
508
- threshold: 0.7
509
- });
966
+ // Step 1: Store an experience (episodic memory)
967
+ async storeExperience(experience) {
968
+ const {
969
+ state,
970
+ action,
971
+ result,
972
+ reward,
973
+ embedding
974
+ } = experience;
975
+
976
+ const experienceId = `exp_${Date.now()}_${Math.random()}`;
977
+
978
+ await this.episodicMemory.insert({
979
+ id: experienceId,
980
+ vector: new Float32Array(embedding),
981
+ metadata: {
982
+ state: state,
983
+ action: action,
984
+ result: result,
985
+ reward: reward,
986
+ timestamp: Date.now(),
987
+ type: 'episodic'
988
+ }
989
+ });
990
+
991
+ console.log(`šŸ’¾ Stored experience: ${action} -> ${result} (reward: ${reward})`);
992
+ return experienceId;
993
+ }
994
+
995
+ // Step 2: Store learned knowledge (semantic memory)
996
+ async storeKnowledge(knowledge) {
997
+ const {
998
+ concept,
999
+ description,
1000
+ embedding,
1001
+ confidence = 1.0
1002
+ } = knowledge;
1003
+
1004
+ const knowledgeId = `know_${Date.now()}`;
1005
+
1006
+ await this.semanticMemory.insert({
1007
+ id: knowledgeId,
1008
+ vector: new Float32Array(embedding),
1009
+ metadata: {
1010
+ concept: concept,
1011
+ description: description,
1012
+ confidence: confidence,
1013
+ learned: Date.now(),
1014
+ uses: 0,
1015
+ type: 'semantic'
1016
+ }
1017
+ });
1018
+
1019
+ console.log(`šŸ“š Learned: ${concept}`);
1020
+ return knowledgeId;
1021
+ }
1022
+
1023
+ // Step 3: Recall similar experiences
1024
+ async recallExperiences(currentState, k = 5) {
1025
+ console.log(`šŸ” Recalling similar experiences...`);
1026
+
1027
+ const results = await this.episodicMemory.search({
1028
+ vector: new Float32Array(currentState.embedding),
1029
+ k: k,
1030
+ threshold: 0.6 // Only recall reasonably similar experiences
1031
+ });
1032
+
1033
+ // Sort by reward to prioritize successful experiences
1034
+ const sorted = results.sort((a, b) => b.metadata.reward - a.metadata.reward);
1035
+
1036
+ console.log(`šŸ“ Recalled ${sorted.length} relevant experiences`);
1037
+
1038
+ return sorted.map(r => ({
1039
+ state: r.metadata.state,
1040
+ action: r.metadata.action,
1041
+ result: r.metadata.result,
1042
+ reward: r.metadata.reward,
1043
+ similarity: r.score
1044
+ }));
1045
+ }
1046
+
1047
+ // Step 4: Query knowledge base
1048
+ async queryKnowledge(query, k = 3) {
1049
+ const results = await this.semanticMemory.search({
1050
+ vector: new Float32Array(query.embedding),
1051
+ k: k
1052
+ });
1053
+
1054
+ // Update usage statistics
1055
+ for (const result of results) {
1056
+ const knowledge = await this.semanticMemory.get(result.id);
1057
+ if (knowledge) {
1058
+ knowledge.metadata.uses += 1;
1059
+ // In production, update the entry
1060
+ }
1061
+ }
1062
+
1063
+ return results.map(r => ({
1064
+ concept: r.metadata.concept,
1065
+ description: r.metadata.description,
1066
+ confidence: r.metadata.confidence,
1067
+ relevance: r.score
1068
+ }));
1069
+ }
1070
+
1071
+ // Step 5: Reflect and learn from experiences
1072
+ async reflect() {
1073
+ console.log('\nšŸ¤” Reflecting on experiences...');
1074
+
1075
+ // Get all experiences
1076
+ const totalExperiences = await this.episodicMemory.len();
1077
+ console.log(`šŸ“Š Total experiences: ${totalExperiences}`);
1078
+
1079
+ // Analyze success rate
1080
+ // In production, you'd aggregate experiences and extract patterns
1081
+ console.log('šŸ’” Analysis complete');
1082
+
1083
+ return {
1084
+ totalExperiences: totalExperiences,
1085
+ knowledgeItems: await this.semanticMemory.len()
1086
+ };
1087
+ }
1088
+
1089
+ // Step 6: Get memory statistics
1090
+ async getStats() {
1091
+ return {
1092
+ episodicMemorySize: await this.episodicMemory.len(),
1093
+ semanticMemorySize: await this.semanticMemory.len(),
1094
+ agentId: this.agentId
1095
+ };
1096
+ }
1097
+ }
1098
+
1099
+ // Example Usage: Simulated agent learning to navigate
1100
+ async function main() {
1101
+ const agent = new AgentMemory('agent-001');
1102
+
1103
+ // Simulate embedding function (in production, use a real model)
1104
+ function embed(text) {
1105
+ return Array(768).fill(0).map(() => Math.random());
1106
+ }
1107
+
1108
+ console.log('\n' + '='.repeat(60));
1109
+ console.log('PHASE 1: Learning from experiences');
1110
+ console.log('='.repeat(60) + '\n');
1111
+
1112
+ // Store some experiences
1113
+ await agent.storeExperience({
1114
+ state: { location: 'room1', goal: 'room3' },
1115
+ action: 'move_north',
1116
+ result: 'reached room2',
1117
+ reward: 0.5,
1118
+ embedding: embed('navigating from room1 to room2')
1119
+ });
1120
+
1121
+ await agent.storeExperience({
1122
+ state: { location: 'room2', goal: 'room3' },
1123
+ action: 'move_east',
1124
+ result: 'reached room3',
1125
+ reward: 1.0,
1126
+ embedding: embed('navigating from room2 to room3')
1127
+ });
1128
+
1129
+ await agent.storeExperience({
1130
+ state: { location: 'room1', goal: 'room3' },
1131
+ action: 'move_south',
1132
+ result: 'hit wall',
1133
+ reward: -0.5,
1134
+ embedding: embed('failed navigation attempt')
1135
+ });
1136
+
1137
+ // Store learned knowledge
1138
+ await agent.storeKnowledge({
1139
+ concept: 'navigation_strategy',
1140
+ description: 'Moving north then east is efficient for reaching room3 from room1',
1141
+ embedding: embed('navigation strategy knowledge'),
1142
+ confidence: 0.9
1143
+ });
1144
+
1145
+ console.log('\n' + '='.repeat(60));
1146
+ console.log('PHASE 2: Applying memory');
1147
+ console.log('='.repeat(60) + '\n');
1148
+
1149
+ // Agent encounters a similar situation
1150
+ const currentState = {
1151
+ location: 'room1',
1152
+ goal: 'room3',
1153
+ embedding: embed('navigating from room1 to room3')
1154
+ };
1155
+
1156
+ // Recall relevant experiences
1157
+ const experiences = await agent.recallExperiences(currentState, 3);
1158
+
1159
+ console.log('\nšŸ“– Recalled experiences:');
1160
+ experiences.forEach((exp, i) => {
1161
+ console.log(`${i + 1}. Action: ${exp.action} | Result: ${exp.result} | Reward: ${exp.reward} | Similarity: ${exp.similarity.toFixed(3)}`);
1162
+ });
1163
+
1164
+ // Query relevant knowledge
1165
+ const knowledge = await agent.queryKnowledge({
1166
+ embedding: embed('how to navigate efficiently')
1167
+ }, 2);
1168
+
1169
+ console.log('\nšŸ“š Relevant knowledge:');
1170
+ knowledge.forEach((k, i) => {
1171
+ console.log(`${i + 1}. ${k.concept}: ${k.description} (confidence: ${k.confidence})`);
1172
+ });
1173
+
1174
+ console.log('\n' + '='.repeat(60));
1175
+ console.log('PHASE 3: Reflection');
1176
+ console.log('='.repeat(60) + '\n');
1177
+
1178
+ // Reflect on learning
1179
+ const stats = await agent.reflect();
1180
+ const memoryStats = await agent.getStats();
1181
+
1182
+ console.log('\nšŸ“Š Memory Statistics:');
1183
+ console.log(` Episodic memories: ${memoryStats.episodicMemorySize}`);
1184
+ console.log(` Semantic knowledge: ${memoryStats.semanticMemorySize}`);
1185
+ console.log(` Agent ID: ${memoryStats.agentId}`);
1186
+ }
1187
+
1188
+ main().catch(console.error);
510
1189
  ```
511
1190
 
1191
+ **Expected Output:**
1192
+ ```
1193
+ 🧠 Memory system initialized for agent: agent-001
1194
+
1195
+ ============================================================
1196
+ PHASE 1: Learning from experiences
1197
+ ============================================================
1198
+
1199
+ šŸ’¾ Stored experience: move_north -> reached room2 (reward: 0.5)
1200
+ šŸ’¾ Stored experience: move_east -> reached room3 (reward: 1.0)
1201
+ šŸ’¾ Stored experience: move_south -> hit wall (reward: -0.5)
1202
+ šŸ“š Learned: navigation_strategy
1203
+
1204
+ ============================================================
1205
+ PHASE 2: Applying memory
1206
+ ============================================================
1207
+
1208
+ šŸ” Recalling similar experiences...
1209
+ šŸ“ Recalled 3 relevant experiences
1210
+
1211
+ šŸ“– Recalled experiences:
1212
+ 1. Action: move_east | Result: reached room3 | Reward: 1.0 | Similarity: 0.892
1213
+ 2. Action: move_north | Result: reached room2 | Reward: 0.5 | Similarity: 0.876
1214
+ 3. Action: move_south | Result: hit wall | Reward: -0.5 | Similarity: 0.654
1215
+
1216
+ šŸ“š Relevant knowledge:
1217
+ 1. navigation_strategy: Moving north then east is efficient for reaching room3 from room1 (confidence: 0.9)
1218
+
1219
+ ============================================================
1220
+ PHASE 3: Reflection
1221
+ ============================================================
1222
+
1223
+ šŸ¤” Reflecting on experiences...
1224
+ šŸ“Š Total experiences: 3
1225
+ šŸ’” Analysis complete
1226
+
1227
+ šŸ“Š Memory Statistics:
1228
+ Episodic memories: 3
1229
+ Semantic knowledge: 1
1230
+ Agent ID: agent-001
1231
+ ```
1232
+
1233
+ **Use Cases:**
1234
+ - āœ… Reinforcement learning agents
1235
+ - āœ… Chatbot conversation history
1236
+ - āœ… Game AI that learns from gameplay
1237
+ - āœ… Personal assistant memory
1238
+ - āœ… Robotic navigation systems
1239
+
512
1240
  ## šŸ—ļø API Reference
513
1241
 
514
1242
  ### Constructor
package/dist/index.js CHANGED
@@ -31,7 +31,7 @@ let implementation;
31
31
  let implementationType = 'wasm';
32
32
  try {
33
33
  // Try to load native module first
34
- implementation = require('@ruvector/core');
34
+ implementation = require('ruvector-core');
35
35
  implementationType = 'native';
36
36
  // Verify it's actually working
37
37
  if (typeof implementation.VectorDB !== 'function') {
@@ -45,7 +45,7 @@ catch (e) {
45
45
  console.warn('[ruvector] Falling back to WASM implementation');
46
46
  }
47
47
  try {
48
- implementation = require('@ruvector/wasm');
48
+ implementation = require('ruvector-wasm');
49
49
  implementationType = 'wasm';
50
50
  }
51
51
  catch (wasmError) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ruvector",
3
- "version": "0.1.3",
3
+ "version": "0.1.5",
4
4
  "description": "High-performance vector database for Node.js with automatic native/WASM fallback",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",