ruvector 0.1.38 → 0.1.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-flow/metrics/agent-metrics.json +1 -0
- package/.claude-flow/metrics/performance.json +87 -0
- package/.claude-flow/metrics/task-metrics.json +10 -0
- package/PACKAGE_SUMMARY.md +409 -0
- package/README.md +1679 -508
- package/bin/cli.js +2427 -0
- package/dist/core/agentdb-fast.d.ts +149 -0
- package/dist/core/agentdb-fast.d.ts.map +1 -0
- package/dist/core/agentdb-fast.js +301 -0
- package/dist/core/attention-fallbacks.d.ts +221 -0
- package/dist/core/attention-fallbacks.d.ts.map +1 -0
- package/dist/core/attention-fallbacks.js +361 -0
- package/dist/core/gnn-wrapper.d.ts +143 -0
- package/dist/core/gnn-wrapper.d.ts.map +1 -0
- package/dist/core/gnn-wrapper.js +213 -0
- package/dist/core/index.d.ts +15 -0
- package/dist/core/index.d.ts.map +1 -0
- package/dist/core/index.js +39 -0
- package/dist/core/sona-wrapper.d.ts +215 -0
- package/dist/core/sona-wrapper.d.ts.map +1 -0
- package/dist/core/sona-wrapper.js +258 -0
- package/dist/index.d.ts +87 -82
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +169 -89
- package/dist/services/embedding-service.d.ts +136 -0
- package/dist/services/embedding-service.d.ts.map +1 -0
- package/dist/services/embedding-service.js +294 -0
- package/dist/services/index.d.ts +6 -0
- package/dist/services/index.d.ts.map +1 -0
- package/dist/services/index.js +26 -0
- package/dist/types.d.ts +145 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/examples/api-usage.js +211 -0
- package/examples/cli-demo.sh +85 -0
- package/package.json +41 -93
- package/bin/ruvector.js +0 -1150
- package/dist/index.d.mts +0 -95
- package/dist/index.mjs +0 -5
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Embedding Service - Unified embedding generation and management
|
|
4
|
+
*
|
|
5
|
+
* This service provides a unified interface for generating, caching, and
|
|
6
|
+
* managing embeddings from various sources (local models, APIs, etc.)
|
|
7
|
+
*/
|
|
8
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
9
|
+
exports.EmbeddingService = exports.LocalNGramProvider = exports.MockEmbeddingProvider = void 0;
|
|
10
|
+
exports.createEmbeddingService = createEmbeddingService;
|
|
11
|
+
exports.getDefaultEmbeddingService = getDefaultEmbeddingService;
|
|
12
|
+
/**
|
|
13
|
+
* Simple hash function for cache keys
|
|
14
|
+
*/
|
|
15
|
+
function hashText(text) {
|
|
16
|
+
let hash = 0;
|
|
17
|
+
for (let i = 0; i < text.length; i++) {
|
|
18
|
+
const char = text.charCodeAt(i);
|
|
19
|
+
hash = ((hash << 5) - hash) + char;
|
|
20
|
+
hash = hash & hash;
|
|
21
|
+
}
|
|
22
|
+
return `h${hash.toString(36)}`;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Mock embedding provider for testing
|
|
26
|
+
*/
|
|
27
|
+
class MockEmbeddingProvider {
|
|
28
|
+
constructor(dimensions = 384) {
|
|
29
|
+
this.name = 'mock';
|
|
30
|
+
this.dimensions = dimensions;
|
|
31
|
+
}
|
|
32
|
+
async embed(texts) {
|
|
33
|
+
return texts.map(text => {
|
|
34
|
+
// Generate deterministic pseudo-random embeddings based on text
|
|
35
|
+
const embedding = [];
|
|
36
|
+
let seed = 0;
|
|
37
|
+
for (let i = 0; i < text.length; i++) {
|
|
38
|
+
seed = ((seed << 5) - seed + text.charCodeAt(i)) | 0;
|
|
39
|
+
}
|
|
40
|
+
for (let i = 0; i < this.dimensions; i++) {
|
|
41
|
+
seed = (seed * 1103515245 + 12345) | 0;
|
|
42
|
+
embedding.push((seed % 1000) / 1000 - 0.5);
|
|
43
|
+
}
|
|
44
|
+
// Normalize
|
|
45
|
+
const norm = Math.sqrt(embedding.reduce((s, v) => s + v * v, 0));
|
|
46
|
+
return embedding.map(v => v / (norm || 1));
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
getDimensions() {
|
|
50
|
+
return this.dimensions;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
exports.MockEmbeddingProvider = MockEmbeddingProvider;
|
|
54
|
+
/**
|
|
55
|
+
* Simple local embedding using character n-grams
|
|
56
|
+
* This is a fallback when no external provider is available
|
|
57
|
+
*/
|
|
58
|
+
class LocalNGramProvider {
|
|
59
|
+
constructor(dimensions = 256, ngramSize = 3) {
|
|
60
|
+
this.name = 'local-ngram';
|
|
61
|
+
this.dimensions = dimensions;
|
|
62
|
+
this.ngramSize = ngramSize;
|
|
63
|
+
}
|
|
64
|
+
async embed(texts) {
|
|
65
|
+
return texts.map(text => this.embedSingle(text));
|
|
66
|
+
}
|
|
67
|
+
embedSingle(text) {
|
|
68
|
+
const embedding = new Array(this.dimensions).fill(0);
|
|
69
|
+
const normalized = text.toLowerCase().replace(/[^a-z0-9]/g, ' ');
|
|
70
|
+
// Generate n-grams and hash them into embedding dimensions
|
|
71
|
+
for (let i = 0; i <= normalized.length - this.ngramSize; i++) {
|
|
72
|
+
const ngram = normalized.slice(i, i + this.ngramSize);
|
|
73
|
+
const hash = this.hashNgram(ngram);
|
|
74
|
+
const idx = Math.abs(hash) % this.dimensions;
|
|
75
|
+
embedding[idx] += hash > 0 ? 1 : -1;
|
|
76
|
+
}
|
|
77
|
+
// Normalize
|
|
78
|
+
const norm = Math.sqrt(embedding.reduce((s, v) => s + v * v, 0));
|
|
79
|
+
return embedding.map(v => v / (norm || 1));
|
|
80
|
+
}
|
|
81
|
+
hashNgram(ngram) {
|
|
82
|
+
let hash = 0;
|
|
83
|
+
for (let i = 0; i < ngram.length; i++) {
|
|
84
|
+
hash = ((hash << 5) - hash + ngram.charCodeAt(i)) | 0;
|
|
85
|
+
}
|
|
86
|
+
return hash;
|
|
87
|
+
}
|
|
88
|
+
getDimensions() {
|
|
89
|
+
return this.dimensions;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
exports.LocalNGramProvider = LocalNGramProvider;
|
|
93
|
+
/**
|
|
94
|
+
* Embedding service with caching and batching
|
|
95
|
+
*/
|
|
96
|
+
class EmbeddingService {
|
|
97
|
+
constructor(config = {}) {
|
|
98
|
+
this.providers = new Map();
|
|
99
|
+
this.cache = new Map();
|
|
100
|
+
this.config = {
|
|
101
|
+
defaultProvider: config.defaultProvider ?? 'local-ngram',
|
|
102
|
+
maxCacheSize: config.maxCacheSize ?? 10000,
|
|
103
|
+
cacheTtl: config.cacheTtl ?? 3600000, // 1 hour
|
|
104
|
+
batchSize: config.batchSize ?? 32,
|
|
105
|
+
};
|
|
106
|
+
// Register default providers
|
|
107
|
+
this.registerProvider(new LocalNGramProvider());
|
|
108
|
+
this.registerProvider(new MockEmbeddingProvider());
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Register an embedding provider
|
|
112
|
+
*/
|
|
113
|
+
registerProvider(provider) {
|
|
114
|
+
this.providers.set(provider.name, provider);
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Get a registered provider
|
|
118
|
+
*/
|
|
119
|
+
getProvider(name) {
|
|
120
|
+
const providerName = name ?? this.config.defaultProvider;
|
|
121
|
+
const provider = this.providers.get(providerName);
|
|
122
|
+
if (!provider) {
|
|
123
|
+
throw new Error(`Provider not found: ${providerName}`);
|
|
124
|
+
}
|
|
125
|
+
return provider;
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Generate embeddings for texts with caching
|
|
129
|
+
*
|
|
130
|
+
* @param texts - Texts to embed
|
|
131
|
+
* @param provider - Provider name (uses default if not specified)
|
|
132
|
+
* @returns Array of embeddings
|
|
133
|
+
*/
|
|
134
|
+
async embed(texts, provider) {
|
|
135
|
+
const providerInstance = this.getProvider(provider);
|
|
136
|
+
const providerName = providerInstance.name;
|
|
137
|
+
const now = Date.now();
|
|
138
|
+
// Check cache and collect texts that need embedding
|
|
139
|
+
const results = new Array(texts.length).fill(null);
|
|
140
|
+
const uncachedIndices = [];
|
|
141
|
+
const uncachedTexts = [];
|
|
142
|
+
for (let i = 0; i < texts.length; i++) {
|
|
143
|
+
const cacheKey = `${providerName}:${hashText(texts[i])}`;
|
|
144
|
+
const cached = this.cache.get(cacheKey);
|
|
145
|
+
if (cached && now - cached.timestamp < this.config.cacheTtl) {
|
|
146
|
+
results[i] = cached.embedding;
|
|
147
|
+
cached.hits++;
|
|
148
|
+
}
|
|
149
|
+
else {
|
|
150
|
+
uncachedIndices.push(i);
|
|
151
|
+
uncachedTexts.push(texts[i]);
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
// Generate embeddings for uncached texts in batches
|
|
155
|
+
if (uncachedTexts.length > 0) {
|
|
156
|
+
const batches = [];
|
|
157
|
+
for (let i = 0; i < uncachedTexts.length; i += this.config.batchSize) {
|
|
158
|
+
batches.push(uncachedTexts.slice(i, i + this.config.batchSize));
|
|
159
|
+
}
|
|
160
|
+
let batchOffset = 0;
|
|
161
|
+
for (const batch of batches) {
|
|
162
|
+
const embeddings = await providerInstance.embed(batch);
|
|
163
|
+
for (let j = 0; j < embeddings.length; j++) {
|
|
164
|
+
const originalIndex = uncachedIndices[batchOffset + j];
|
|
165
|
+
results[originalIndex] = embeddings[j];
|
|
166
|
+
// Cache the result
|
|
167
|
+
const cacheKey = `${providerName}:${hashText(texts[originalIndex])}`;
|
|
168
|
+
this.addToCache(cacheKey, embeddings[j], now);
|
|
169
|
+
}
|
|
170
|
+
batchOffset += batch.length;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
return results;
|
|
174
|
+
}
|
|
175
|
+
/**
|
|
176
|
+
* Generate a single embedding
|
|
177
|
+
*/
|
|
178
|
+
async embedOne(text, provider) {
|
|
179
|
+
const results = await this.embed([text], provider);
|
|
180
|
+
return results[0];
|
|
181
|
+
}
|
|
182
|
+
/**
|
|
183
|
+
* Add entry to cache with LRU eviction
|
|
184
|
+
*/
|
|
185
|
+
addToCache(key, embedding, timestamp) {
|
|
186
|
+
// Evict old entries if cache is full
|
|
187
|
+
if (this.cache.size >= this.config.maxCacheSize) {
|
|
188
|
+
// Find and remove least recently used entry
|
|
189
|
+
let oldestKey = '';
|
|
190
|
+
let oldestTime = Infinity;
|
|
191
|
+
let lowestHits = Infinity;
|
|
192
|
+
for (const [k, v] of this.cache.entries()) {
|
|
193
|
+
if (v.hits < lowestHits || (v.hits === lowestHits && v.timestamp < oldestTime)) {
|
|
194
|
+
oldestKey = k;
|
|
195
|
+
oldestTime = v.timestamp;
|
|
196
|
+
lowestHits = v.hits;
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
if (oldestKey) {
|
|
200
|
+
this.cache.delete(oldestKey);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
this.cache.set(key, { embedding, timestamp, hits: 0 });
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* Compute cosine similarity between two embeddings
|
|
207
|
+
*/
|
|
208
|
+
cosineSimilarity(a, b) {
|
|
209
|
+
if (a.length !== b.length) {
|
|
210
|
+
throw new Error('Embeddings must have same dimensions');
|
|
211
|
+
}
|
|
212
|
+
let dotProduct = 0;
|
|
213
|
+
let normA = 0;
|
|
214
|
+
let normB = 0;
|
|
215
|
+
for (let i = 0; i < a.length; i++) {
|
|
216
|
+
dotProduct += a[i] * b[i];
|
|
217
|
+
normA += a[i] * a[i];
|
|
218
|
+
normB += b[i] * b[i];
|
|
219
|
+
}
|
|
220
|
+
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
221
|
+
return denom === 0 ? 0 : dotProduct / denom;
|
|
222
|
+
}
|
|
223
|
+
/**
|
|
224
|
+
* Find most similar texts from a corpus
|
|
225
|
+
*/
|
|
226
|
+
async findSimilar(query, corpus, k = 5, provider) {
|
|
227
|
+
const [queryEmbed, ...corpusEmbeds] = await this.embed([query, ...corpus], provider);
|
|
228
|
+
const results = corpusEmbeds.map((embed, i) => ({
|
|
229
|
+
text: corpus[i],
|
|
230
|
+
similarity: this.cosineSimilarity(queryEmbed, embed),
|
|
231
|
+
index: i,
|
|
232
|
+
}));
|
|
233
|
+
return results
|
|
234
|
+
.sort((a, b) => b.similarity - a.similarity)
|
|
235
|
+
.slice(0, k);
|
|
236
|
+
}
|
|
237
|
+
/**
|
|
238
|
+
* Get cache statistics
|
|
239
|
+
*/
|
|
240
|
+
getCacheStats() {
|
|
241
|
+
let totalHits = 0;
|
|
242
|
+
for (const entry of this.cache.values()) {
|
|
243
|
+
totalHits += entry.hits;
|
|
244
|
+
}
|
|
245
|
+
return {
|
|
246
|
+
size: this.cache.size,
|
|
247
|
+
maxSize: this.config.maxCacheSize,
|
|
248
|
+
hitRate: this.cache.size > 0 ? totalHits / this.cache.size : 0,
|
|
249
|
+
};
|
|
250
|
+
}
|
|
251
|
+
/**
|
|
252
|
+
* Clear the cache
|
|
253
|
+
*/
|
|
254
|
+
clearCache() {
|
|
255
|
+
this.cache.clear();
|
|
256
|
+
}
|
|
257
|
+
/**
|
|
258
|
+
* Get embedding dimensions for a provider
|
|
259
|
+
*/
|
|
260
|
+
getDimensions(provider) {
|
|
261
|
+
return this.getProvider(provider).getDimensions();
|
|
262
|
+
}
|
|
263
|
+
/**
|
|
264
|
+
* List available providers
|
|
265
|
+
*/
|
|
266
|
+
listProviders() {
|
|
267
|
+
return Array.from(this.providers.keys());
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
exports.EmbeddingService = EmbeddingService;
|
|
271
|
+
/**
|
|
272
|
+
* Create an embedding service instance
|
|
273
|
+
*/
|
|
274
|
+
function createEmbeddingService(config) {
|
|
275
|
+
return new EmbeddingService(config);
|
|
276
|
+
}
|
|
277
|
+
// Singleton instance
|
|
278
|
+
let defaultService = null;
|
|
279
|
+
/**
|
|
280
|
+
* Get the default embedding service instance
|
|
281
|
+
*/
|
|
282
|
+
function getDefaultEmbeddingService() {
|
|
283
|
+
if (!defaultService) {
|
|
284
|
+
defaultService = new EmbeddingService();
|
|
285
|
+
}
|
|
286
|
+
return defaultService;
|
|
287
|
+
}
|
|
288
|
+
exports.default = {
|
|
289
|
+
EmbeddingService,
|
|
290
|
+
LocalNGramProvider,
|
|
291
|
+
MockEmbeddingProvider,
|
|
292
|
+
createEmbeddingService,
|
|
293
|
+
getDefaultEmbeddingService,
|
|
294
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/services/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,cAAc,qBAAqB,CAAC;AACpC,OAAO,EAAE,OAAO,IAAI,gBAAgB,EAAE,MAAM,qBAAqB,CAAC"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Services module exports
|
|
4
|
+
*/
|
|
5
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
6
|
+
if (k2 === undefined) k2 = k;
|
|
7
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
8
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
9
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
10
|
+
}
|
|
11
|
+
Object.defineProperty(o, k2, desc);
|
|
12
|
+
}) : (function(o, m, k, k2) {
|
|
13
|
+
if (k2 === undefined) k2 = k;
|
|
14
|
+
o[k2] = m[k];
|
|
15
|
+
}));
|
|
16
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
17
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
18
|
+
};
|
|
19
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
20
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
21
|
+
};
|
|
22
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
23
|
+
exports.embeddingService = void 0;
|
|
24
|
+
__exportStar(require("./embedding-service"), exports);
|
|
25
|
+
var embedding_service_1 = require("./embedding-service");
|
|
26
|
+
Object.defineProperty(exports, "embeddingService", { enumerable: true, get: function () { return __importDefault(embedding_service_1).default; } });
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vector entry representing a document with its embedding
|
|
3
|
+
*/
|
|
4
|
+
export interface VectorEntry {
|
|
5
|
+
/** Unique identifier for the vector */
|
|
6
|
+
id: string;
|
|
7
|
+
/** Vector embedding (array of floats) */
|
|
8
|
+
vector: number[];
|
|
9
|
+
/** Optional metadata associated with the vector */
|
|
10
|
+
metadata?: Record<string, any>;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Search query parameters
|
|
14
|
+
*/
|
|
15
|
+
export interface SearchQuery {
|
|
16
|
+
/** Query vector to search for */
|
|
17
|
+
vector: number[];
|
|
18
|
+
/** Number of results to return */
|
|
19
|
+
k?: number;
|
|
20
|
+
/** Optional metadata filters */
|
|
21
|
+
filter?: Record<string, any>;
|
|
22
|
+
/** Minimum similarity threshold (0-1) */
|
|
23
|
+
threshold?: number;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Search result containing matched vector and similarity score
|
|
27
|
+
*/
|
|
28
|
+
export interface SearchResult {
|
|
29
|
+
/** ID of the matched vector */
|
|
30
|
+
id: string;
|
|
31
|
+
/** Similarity score (0-1, higher is better) */
|
|
32
|
+
score: number;
|
|
33
|
+
/** Vector data */
|
|
34
|
+
vector: number[];
|
|
35
|
+
/** Associated metadata */
|
|
36
|
+
metadata?: Record<string, any>;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Database configuration options
|
|
40
|
+
*/
|
|
41
|
+
export interface DbOptions {
|
|
42
|
+
/** Vector dimension size */
|
|
43
|
+
dimension: number;
|
|
44
|
+
/** Distance metric to use */
|
|
45
|
+
metric?: 'cosine' | 'euclidean' | 'dot';
|
|
46
|
+
/** Path to persist database */
|
|
47
|
+
path?: string;
|
|
48
|
+
/** Enable auto-persistence */
|
|
49
|
+
autoPersist?: boolean;
|
|
50
|
+
/** HNSW index parameters */
|
|
51
|
+
hnsw?: {
|
|
52
|
+
/** Maximum number of connections per layer */
|
|
53
|
+
m?: number;
|
|
54
|
+
/** Size of the dynamic candidate list */
|
|
55
|
+
efConstruction?: number;
|
|
56
|
+
/** Size of the dynamic candidate list for search */
|
|
57
|
+
efSearch?: number;
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Database statistics
|
|
62
|
+
*/
|
|
63
|
+
export interface DbStats {
|
|
64
|
+
/** Total number of vectors */
|
|
65
|
+
count: number;
|
|
66
|
+
/** Vector dimension */
|
|
67
|
+
dimension: number;
|
|
68
|
+
/** Distance metric */
|
|
69
|
+
metric: string;
|
|
70
|
+
/** Memory usage in bytes */
|
|
71
|
+
memoryUsage?: number;
|
|
72
|
+
/** Index type */
|
|
73
|
+
indexType?: string;
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Main VectorDB class interface
|
|
77
|
+
*/
|
|
78
|
+
export interface VectorDB {
|
|
79
|
+
/**
|
|
80
|
+
* Create a new vector database
|
|
81
|
+
* @param options Database configuration
|
|
82
|
+
*/
|
|
83
|
+
new (options: DbOptions): VectorDB;
|
|
84
|
+
/**
|
|
85
|
+
* Insert a single vector
|
|
86
|
+
* @param entry Vector entry to insert
|
|
87
|
+
*/
|
|
88
|
+
insert(entry: VectorEntry): void;
|
|
89
|
+
/**
|
|
90
|
+
* Insert multiple vectors in batch
|
|
91
|
+
* @param entries Array of vector entries
|
|
92
|
+
*/
|
|
93
|
+
insertBatch(entries: VectorEntry[]): void;
|
|
94
|
+
/**
|
|
95
|
+
* Search for similar vectors
|
|
96
|
+
* @param query Search query parameters
|
|
97
|
+
* @returns Array of search results
|
|
98
|
+
*/
|
|
99
|
+
search(query: SearchQuery): SearchResult[];
|
|
100
|
+
/**
|
|
101
|
+
* Get vector by ID
|
|
102
|
+
* @param id Vector ID
|
|
103
|
+
* @returns Vector entry or null
|
|
104
|
+
*/
|
|
105
|
+
get(id: string): VectorEntry | null;
|
|
106
|
+
/**
|
|
107
|
+
* Delete vector by ID
|
|
108
|
+
* @param id Vector ID
|
|
109
|
+
* @returns true if deleted, false if not found
|
|
110
|
+
*/
|
|
111
|
+
delete(id: string): boolean;
|
|
112
|
+
/**
|
|
113
|
+
* Update vector metadata
|
|
114
|
+
* @param id Vector ID
|
|
115
|
+
* @param metadata New metadata
|
|
116
|
+
*/
|
|
117
|
+
updateMetadata(id: string, metadata: Record<string, any>): void;
|
|
118
|
+
/**
|
|
119
|
+
* Get database statistics
|
|
120
|
+
*/
|
|
121
|
+
stats(): DbStats;
|
|
122
|
+
/**
|
|
123
|
+
* Save database to disk
|
|
124
|
+
* @param path Optional path (uses configured path if not provided)
|
|
125
|
+
*/
|
|
126
|
+
save(path?: string): void;
|
|
127
|
+
/**
|
|
128
|
+
* Load database from disk
|
|
129
|
+
* @param path Path to database file
|
|
130
|
+
*/
|
|
131
|
+
load(path: string): void;
|
|
132
|
+
/**
|
|
133
|
+
* Clear all vectors from database
|
|
134
|
+
*/
|
|
135
|
+
clear(): void;
|
|
136
|
+
/**
|
|
137
|
+
* Build HNSW index for faster search
|
|
138
|
+
*/
|
|
139
|
+
buildIndex(): void;
|
|
140
|
+
/**
|
|
141
|
+
* Optimize database (rebuild indices, compact storage)
|
|
142
|
+
*/
|
|
143
|
+
optimize(): void;
|
|
144
|
+
}
|
|
145
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,uCAAuC;IACvC,EAAE,EAAE,MAAM,CAAC;IACX,yCAAyC;IACzC,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,mDAAmD;IACnD,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAChC;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,iCAAiC;IACjC,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,kCAAkC;IAClC,CAAC,CAAC,EAAE,MAAM,CAAC;IACX,gCAAgC;IAChC,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC7B,yCAAyC;IACzC,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,+BAA+B;IAC/B,EAAE,EAAE,MAAM,CAAC;IACX,+CAA+C;IAC/C,KAAK,EAAE,MAAM,CAAC;IACd,kBAAkB;IAClB,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,0BAA0B;IAC1B,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAChC;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,4BAA4B;IAC5B,SAAS,EAAE,MAAM,CAAC;IAClB,6BAA6B;IAC7B,MAAM,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,KAAK,CAAC;IACxC,+BAA+B;IAC/B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,8BAA8B;IAC9B,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,4BAA4B;IAC5B,IAAI,CAAC,EAAE;QACL,8CAA8C;QAC9C,CAAC,CAAC,EAAE,MAAM,CAAC;QACX,yCAAyC;QACzC,cAAc,CAAC,EAAE,MAAM,CAAC;QACxB,oDAAoD;QACpD,QAAQ,CAAC,EAAE,MAAM,CAAC;KACnB,CAAC;CACH;AAED;;GAEG;AACH,MAAM,WAAW,OAAO;IACtB,8BAA8B;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd,uBAAuB;IACvB,SAAS,EAAE,MAAM,CAAC;IAClB,sBAAsB;IACtB,MAAM,EAAE,MAAM,CAAC;IACf,4BAA4B;IAC5B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,iBAAiB;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB;;;OAGG;IACH,KAAI,OAAO,EAAE,SAAS,GAAG,QAAQ,CAAC;IAElC;;;OAGG;IACH,MAAM,CAAC,KAAK,EAAE,WAAW,GAAG,IAAI,CAAC;IAEjC;;;OAGG;IACH,WAAW,CAAC,OAAO,EAAE,WAAW,EAAE,GAAG,IAAI,CAAC;IAE1C;;;;OAIG;IACH,MAAM,CAAC,KAAK,EAAE,WAAW,GAAG,YAAY,EAAE,CAAC;IAE3C;;;;OAIG;IACH,GAAG,CAAC,EAAE,EAAE,MAAM,GAAG,WAAW,GAAG,IAAI,CAAC;IAEpC;;;;OAIG;IACH,MAAM,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC;IAE5B;;;;OAIG;IACH,cAAc,CAAC,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,IAAI,CAAC;IAEhE;;OAEG;IACH,KAAK,IAAI,OAAO,CAAC;IAEjB;;;OAGG;IACH,IAAI,CAAC,IAAI,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAE1B;;;OAGG;IACH,IAAI,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IAEzB;;OAEG;IACH,KAAK,IAAI,IAAI,CAAC;IAEd;;OAEG;IACH,UAAU,IAAI,IAAI,CAAC;IAEnB;;OAEG;IACH,QAAQ,IAAI,IAAI,CAAC;CAClB"}
|
package/dist/types.js
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* ruvector API Usage Examples
|
|
5
|
+
*
|
|
6
|
+
* This demonstrates how to use ruvector in your Node.js applications
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
// For this demo, we use the mock implementation
|
|
10
|
+
// In production, you would use: const { VectorDB } = require('ruvector');
|
|
11
|
+
const { VectorDB } = require('../test/mock-implementation.js');
|
|
12
|
+
|
|
13
|
+
console.log('ruvector API Examples\n');
|
|
14
|
+
console.log('='.repeat(60));
|
|
15
|
+
|
|
16
|
+
// Show info
|
|
17
|
+
console.log('\nUsing: Mock implementation (for demo purposes)');
|
|
18
|
+
console.log('In production: npm install ruvector\n');
|
|
19
|
+
|
|
20
|
+
// Example 1: Basic usage
|
|
21
|
+
console.log('Example 1: Basic Vector Operations');
|
|
22
|
+
console.log('-'.repeat(60));
|
|
23
|
+
|
|
24
|
+
const db = new VectorDB({
|
|
25
|
+
dimension: 3,
|
|
26
|
+
metric: 'cosine'
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
// Insert some vectors
|
|
30
|
+
db.insert({
|
|
31
|
+
id: 'doc1',
|
|
32
|
+
vector: [1, 0, 0],
|
|
33
|
+
metadata: { title: 'First Document', category: 'A' }
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
db.insertBatch([
|
|
37
|
+
{ id: 'doc2', vector: [0, 1, 0], metadata: { title: 'Second Document', category: 'B' } },
|
|
38
|
+
{ id: 'doc3', vector: [0, 0, 1], metadata: { title: 'Third Document', category: 'C' } },
|
|
39
|
+
{ id: 'doc4', vector: [0.7, 0.7, 0], metadata: { title: 'Fourth Document', category: 'A' } }
|
|
40
|
+
]);
|
|
41
|
+
|
|
42
|
+
console.log('✓ Inserted 4 vectors');
|
|
43
|
+
|
|
44
|
+
// Get stats
|
|
45
|
+
const stats = db.stats();
|
|
46
|
+
console.log(`✓ Database has ${stats.count} vectors, dimension ${stats.dimension}`);
|
|
47
|
+
|
|
48
|
+
// Search
|
|
49
|
+
const results = db.search({
|
|
50
|
+
vector: [1, 0, 0],
|
|
51
|
+
k: 3
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
console.log(`✓ Search returned ${results.length} results:`);
|
|
55
|
+
results.forEach((result, i) => {
|
|
56
|
+
console.log(` ${i + 1}. ${result.id} (score: ${result.score.toFixed(4)}) - ${result.metadata.title}`);
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
// Get by ID
|
|
60
|
+
const doc = db.get('doc2');
|
|
61
|
+
console.log(`✓ Retrieved document: ${doc.metadata.title}`);
|
|
62
|
+
|
|
63
|
+
// Update metadata
|
|
64
|
+
db.updateMetadata('doc1', { updated: true, timestamp: Date.now() });
|
|
65
|
+
console.log('✓ Updated metadata');
|
|
66
|
+
|
|
67
|
+
// Delete
|
|
68
|
+
db.delete('doc3');
|
|
69
|
+
console.log('✓ Deleted doc3');
|
|
70
|
+
console.log(`✓ Database now has ${db.stats().count} vectors\n`);
|
|
71
|
+
|
|
72
|
+
// Example 2: Semantic Search Simulation
|
|
73
|
+
console.log('Example 2: Semantic Search Simulation');
|
|
74
|
+
console.log('-'.repeat(60));
|
|
75
|
+
|
|
76
|
+
const semanticDb = new VectorDB({
|
|
77
|
+
dimension: 5,
|
|
78
|
+
metric: 'cosine'
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
// Simulate document embeddings
|
|
82
|
+
const documents = [
|
|
83
|
+
{ id: 'machine-learning', vector: [0.9, 0.8, 0.1, 0.2, 0.1], metadata: { title: 'Introduction to Machine Learning', topic: 'AI' } },
|
|
84
|
+
{ id: 'deep-learning', vector: [0.85, 0.9, 0.15, 0.25, 0.1], metadata: { title: 'Deep Learning Fundamentals', topic: 'AI' } },
|
|
85
|
+
{ id: 'web-dev', vector: [0.1, 0.2, 0.9, 0.8, 0.1], metadata: { title: 'Web Development Guide', topic: 'Web' } },
|
|
86
|
+
{ id: 'react', vector: [0.15, 0.2, 0.85, 0.9, 0.1], metadata: { title: 'React Tutorial', topic: 'Web' } },
|
|
87
|
+
{ id: 'database', vector: [0.2, 0.3, 0.3, 0.4, 0.9], metadata: { title: 'Database Design', topic: 'Data' } }
|
|
88
|
+
];
|
|
89
|
+
|
|
90
|
+
semanticDb.insertBatch(documents);
|
|
91
|
+
console.log(`✓ Indexed ${documents.length} documents`);
|
|
92
|
+
|
|
93
|
+
// Search for AI-related content
|
|
94
|
+
const aiQuery = [0.9, 0.85, 0.1, 0.2, 0.1];
|
|
95
|
+
const aiResults = semanticDb.search({ vector: aiQuery, k: 2 });
|
|
96
|
+
|
|
97
|
+
console.log('\nQuery: AI-related content');
|
|
98
|
+
console.log('Results:');
|
|
99
|
+
aiResults.forEach((result, i) => {
|
|
100
|
+
console.log(` ${i + 1}. ${result.metadata.title} (score: ${result.score.toFixed(4)})`);
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
// Search for Web-related content
|
|
104
|
+
const webQuery = [0.1, 0.2, 0.9, 0.85, 0.1];
|
|
105
|
+
const webResults = semanticDb.search({ vector: webQuery, k: 2 });
|
|
106
|
+
|
|
107
|
+
console.log('\nQuery: Web-related content');
|
|
108
|
+
console.log('Results:');
|
|
109
|
+
webResults.forEach((result, i) => {
|
|
110
|
+
console.log(` ${i + 1}. ${result.metadata.title} (score: ${result.score.toFixed(4)})`);
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
// Example 3: Different Distance Metrics
|
|
114
|
+
console.log('\n\nExample 3: Distance Metrics Comparison');
|
|
115
|
+
console.log('-'.repeat(60));
|
|
116
|
+
|
|
117
|
+
const metrics = ['cosine', 'euclidean', 'dot'];
|
|
118
|
+
const testVectors = [
|
|
119
|
+
{ id: 'v1', vector: [1, 0, 0] },
|
|
120
|
+
{ id: 'v2', vector: [0.7, 0.7, 0] },
|
|
121
|
+
{ id: 'v3', vector: [0, 1, 0] }
|
|
122
|
+
];
|
|
123
|
+
|
|
124
|
+
metrics.forEach(metric => {
|
|
125
|
+
const metricDb = new VectorDB({ dimension: 3, metric });
|
|
126
|
+
metricDb.insertBatch(testVectors);
|
|
127
|
+
|
|
128
|
+
const results = metricDb.search({ vector: [1, 0, 0], k: 3 });
|
|
129
|
+
|
|
130
|
+
console.log(`\n${metric.toUpperCase()} metric:`);
|
|
131
|
+
results.forEach((result, i) => {
|
|
132
|
+
console.log(` ${i + 1}. ${result.id}: ${result.score.toFixed(4)}`);
|
|
133
|
+
});
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
// Example 4: Batch Operations Performance
|
|
137
|
+
console.log('\n\nExample 4: Batch Operations Performance');
|
|
138
|
+
console.log('-'.repeat(60));
|
|
139
|
+
|
|
140
|
+
const perfDb = new VectorDB({ dimension: 128, metric: 'cosine' });
|
|
141
|
+
|
|
142
|
+
// Generate random vectors
|
|
143
|
+
const numVectors = 1000;
|
|
144
|
+
const vectors = [];
|
|
145
|
+
for (let i = 0; i < numVectors; i++) {
|
|
146
|
+
vectors.push({
|
|
147
|
+
id: `vec_${i}`,
|
|
148
|
+
vector: Array.from({ length: 128 }, () => Math.random()),
|
|
149
|
+
metadata: { index: i, batch: Math.floor(i / 100) }
|
|
150
|
+
});
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
console.log(`Inserting ${numVectors} vectors...`);
|
|
154
|
+
const insertStart = Date.now();
|
|
155
|
+
perfDb.insertBatch(vectors);
|
|
156
|
+
const insertTime = Date.now() - insertStart;
|
|
157
|
+
|
|
158
|
+
console.log(`✓ Inserted ${numVectors} vectors in ${insertTime}ms`);
|
|
159
|
+
console.log(`✓ Rate: ${Math.round(numVectors / (insertTime / 1000))} vectors/sec`);
|
|
160
|
+
|
|
161
|
+
// Search performance
|
|
162
|
+
const numQueries = 100;
|
|
163
|
+
console.log(`\nRunning ${numQueries} searches...`);
|
|
164
|
+
const searchStart = Date.now();
|
|
165
|
+
|
|
166
|
+
for (let i = 0; i < numQueries; i++) {
|
|
167
|
+
const query = {
|
|
168
|
+
vector: Array.from({ length: 128 }, () => Math.random()),
|
|
169
|
+
k: 10
|
|
170
|
+
};
|
|
171
|
+
perfDb.search(query);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
const searchTime = Date.now() - searchStart;
|
|
175
|
+
console.log(`✓ Completed ${numQueries} searches in ${searchTime}ms`);
|
|
176
|
+
console.log(`✓ Rate: ${Math.round(numQueries / (searchTime / 1000))} queries/sec`);
|
|
177
|
+
console.log(`✓ Avg latency: ${(searchTime / numQueries).toFixed(2)}ms`);
|
|
178
|
+
|
|
179
|
+
// Example 5: Persistence (conceptual, would need real implementation)
|
|
180
|
+
console.log('\n\nExample 5: Persistence');
|
|
181
|
+
console.log('-'.repeat(60));
|
|
182
|
+
|
|
183
|
+
const persistDb = new VectorDB({
|
|
184
|
+
dimension: 3,
|
|
185
|
+
metric: 'cosine',
|
|
186
|
+
path: './my-vectors.db',
|
|
187
|
+
autoPersist: true
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
persistDb.insertBatch([
|
|
191
|
+
{ id: 'p1', vector: [1, 0, 0], metadata: { name: 'First' } },
|
|
192
|
+
{ id: 'p2', vector: [0, 1, 0], metadata: { name: 'Second' } }
|
|
193
|
+
]);
|
|
194
|
+
|
|
195
|
+
console.log('✓ Created database with auto-persist enabled');
|
|
196
|
+
console.log('✓ Insert operations will automatically save to disk');
|
|
197
|
+
console.log('✓ Use db.save(path) for manual saves');
|
|
198
|
+
console.log('✓ Use db.load(path) to restore from disk');
|
|
199
|
+
|
|
200
|
+
// Summary
|
|
201
|
+
console.log('\n' + '='.repeat(60));
|
|
202
|
+
console.log('\n✅ All examples completed successfully!');
|
|
203
|
+
console.log('\nKey Features Demonstrated:');
|
|
204
|
+
console.log(' • Basic CRUD operations (insert, search, get, update, delete)');
|
|
205
|
+
console.log(' • Batch operations for better performance');
|
|
206
|
+
console.log(' • Multiple distance metrics (cosine, euclidean, dot)');
|
|
207
|
+
console.log(' • Semantic search simulation');
|
|
208
|
+
console.log(' • Performance benchmarking');
|
|
209
|
+
console.log(' • Metadata filtering and updates');
|
|
210
|
+
console.log(' • Persistence (save/load)');
|
|
211
|
+
console.log('\nFor more examples, see: /workspaces/ruvector/npm/packages/ruvector/examples/');
|