ruvector 0.1.26 → 0.1.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,136 @@
1
+ /**
2
+ * Embedding Service - Unified embedding generation and management
3
+ *
4
+ * This service provides a unified interface for generating, caching, and
5
+ * managing embeddings from various sources (local models, APIs, etc.)
6
+ */
7
+ /**
8
+ * Embedding provider interface
9
+ */
10
+ export interface EmbeddingProvider {
11
+ /** Provider name */
12
+ name: string;
13
+ /** Generate embeddings for texts */
14
+ embed(texts: string[]): Promise<number[][]>;
15
+ /** Get embedding dimensions */
16
+ getDimensions(): number;
17
+ }
18
+ /**
19
+ * Embedding service configuration
20
+ */
21
+ export interface EmbeddingServiceConfig {
22
+ /** Default provider to use */
23
+ defaultProvider?: string;
24
+ /** Maximum cache size */
25
+ maxCacheSize?: number;
26
+ /** Cache TTL in milliseconds */
27
+ cacheTtl?: number;
28
+ /** Batch size for embedding generation */
29
+ batchSize?: number;
30
+ }
31
+ /**
32
+ * Mock embedding provider for testing
33
+ */
34
+ export declare class MockEmbeddingProvider implements EmbeddingProvider {
35
+ name: string;
36
+ private dimensions;
37
+ constructor(dimensions?: number);
38
+ embed(texts: string[]): Promise<number[][]>;
39
+ getDimensions(): number;
40
+ }
41
+ /**
42
+ * Simple local embedding using character n-grams
43
+ * This is a fallback when no external provider is available
44
+ */
45
+ export declare class LocalNGramProvider implements EmbeddingProvider {
46
+ name: string;
47
+ private dimensions;
48
+ private ngramSize;
49
+ constructor(dimensions?: number, ngramSize?: number);
50
+ embed(texts: string[]): Promise<number[][]>;
51
+ private embedSingle;
52
+ private hashNgram;
53
+ getDimensions(): number;
54
+ }
55
+ /**
56
+ * Embedding service with caching and batching
57
+ */
58
+ export declare class EmbeddingService {
59
+ private providers;
60
+ private cache;
61
+ private config;
62
+ constructor(config?: EmbeddingServiceConfig);
63
+ /**
64
+ * Register an embedding provider
65
+ */
66
+ registerProvider(provider: EmbeddingProvider): void;
67
+ /**
68
+ * Get a registered provider
69
+ */
70
+ getProvider(name?: string): EmbeddingProvider;
71
+ /**
72
+ * Generate embeddings for texts with caching
73
+ *
74
+ * @param texts - Texts to embed
75
+ * @param provider - Provider name (uses default if not specified)
76
+ * @returns Array of embeddings
77
+ */
78
+ embed(texts: string[], provider?: string): Promise<number[][]>;
79
+ /**
80
+ * Generate a single embedding
81
+ */
82
+ embedOne(text: string, provider?: string): Promise<number[]>;
83
+ /**
84
+ * Add entry to cache with LRU eviction
85
+ */
86
+ private addToCache;
87
+ /**
88
+ * Compute cosine similarity between two embeddings
89
+ */
90
+ cosineSimilarity(a: number[], b: number[]): number;
91
+ /**
92
+ * Find most similar texts from a corpus
93
+ */
94
+ findSimilar(query: string, corpus: string[], k?: number, provider?: string): Promise<{
95
+ text: string;
96
+ similarity: number;
97
+ index: number;
98
+ }[]>;
99
+ /**
100
+ * Get cache statistics
101
+ */
102
+ getCacheStats(): {
103
+ size: number;
104
+ maxSize: number;
105
+ hitRate: number;
106
+ };
107
+ /**
108
+ * Clear the cache
109
+ */
110
+ clearCache(): void;
111
+ /**
112
+ * Get embedding dimensions for a provider
113
+ */
114
+ getDimensions(provider?: string): number;
115
+ /**
116
+ * List available providers
117
+ */
118
+ listProviders(): string[];
119
+ }
120
+ /**
121
+ * Create an embedding service instance
122
+ */
123
+ export declare function createEmbeddingService(config?: EmbeddingServiceConfig): EmbeddingService;
124
+ /**
125
+ * Get the default embedding service instance
126
+ */
127
+ export declare function getDefaultEmbeddingService(): EmbeddingService;
128
+ declare const _default: {
129
+ EmbeddingService: typeof EmbeddingService;
130
+ LocalNGramProvider: typeof LocalNGramProvider;
131
+ MockEmbeddingProvider: typeof MockEmbeddingProvider;
132
+ createEmbeddingService: typeof createEmbeddingService;
133
+ getDefaultEmbeddingService: typeof getDefaultEmbeddingService;
134
+ };
135
+ export default _default;
136
+ //# sourceMappingURL=embedding-service.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embedding-service.d.ts","sourceRoot":"","sources":["../../src/services/embedding-service.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,oBAAoB;IACpB,IAAI,EAAE,MAAM,CAAC;IACb,oCAAoC;IACpC,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;IAC5C,+BAA+B;IAC/B,aAAa,IAAI,MAAM,CAAC;CACzB;AAWD;;GAEG;AACH,MAAM,WAAW,sBAAsB;IACrC,8BAA8B;IAC9B,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,yBAAyB;IACzB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,gCAAgC;IAChC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,0CAA0C;IAC1C,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAeD;;GAEG;AACH,qBAAa,qBAAsB,YAAW,iBAAiB;IAC7D,IAAI,SAAU;IACd,OAAO,CAAC,UAAU,CAAS;gBAEf,UAAU,GAAE,MAAY;IAI9B,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAoBjD,aAAa,IAAI,MAAM;CAGxB;AAED;;;GAGG;AACH,qBAAa,kBAAmB,YAAW,iBAAiB;IAC1D,IAAI,SAAiB;IACrB,OAAO,CAAC,UAAU,CAAS;IAC3B,OAAO,CAAC,SAAS,CAAS;gBAEd,UAAU,GAAE,MAAY,EAAE,SAAS,GAAE,MAAU;IAKrD,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAIjD,OAAO,CAAC,WAAW;IAiBnB,OAAO,CAAC,SAAS;IAQjB,aAAa,IAAI,MAAM;CAGxB;AAED;;GAEG;AACH,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,SAAS,CAA6C;IAC9D,OAAO,CAAC,KAAK,CAAsC;IACnD,OAAO,CAAC,MAAM,CAAmC;gBAErC,MAAM,GAAE,sBAA2B;IAa/C;;OAEG;IACH,gBAAgB,CAAC,QAAQ,EAAE,iBAAiB,GAAG,IAAI;IAInD;;OAEG;IACH,WAAW,CAAC,IAAI,CAAC,EAAE,MAAM,GAAG,iBAAiB;IAS7C;;;;;;OAMG;IACG,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAkDpE;;OAEG;IACG,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAKlE;;OAEG;IACH,OAAO,CAAC,UAAU;IAwBlB;;OAEG;IACH,gBAAgB,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM;IAmBlD;;OAEG;IACG,WAAW,CACf,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EAAE,EAChB,CAAC,GAAE,MAAU,EACb,QAAQ,CAAC,EAAE,MAAM,GAChB,OAAO,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;IAcjE;;OAEG;IACH,aAAa,IAAI;QACf,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,EAAE,MAAM,CAAC;QAChB,OAAO,EAAE,MAAM,CAAC;KACjB;IAaD;;OAEG;IACH,UAAU,IAAI,IAAI;IAIlB;;OAEG;IACH,aAAa,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,MAAM;IAIxC;;OAEG;IACH,aAAa,IAAI,MAAM,EAAE;CAG1B;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CACpC,MAAM,CAAC,EAAE,sBAAsB,GAC9B,gBAAgB,CAElB;AAKD;;GAEG;AACH,wBAAgB,0BAA0B,IAAI,gBAAgB,CAK7D;;;;;;;;AAED,wBAME"}
@@ -0,0 +1,294 @@
1
+ "use strict";
2
+ /**
3
+ * Embedding Service - Unified embedding generation and management
4
+ *
5
+ * This service provides a unified interface for generating, caching, and
6
+ * managing embeddings from various sources (local models, APIs, etc.)
7
+ */
8
+ Object.defineProperty(exports, "__esModule", { value: true });
9
+ exports.EmbeddingService = exports.LocalNGramProvider = exports.MockEmbeddingProvider = void 0;
10
+ exports.createEmbeddingService = createEmbeddingService;
11
+ exports.getDefaultEmbeddingService = getDefaultEmbeddingService;
12
+ /**
13
+ * Simple hash function for cache keys
14
+ */
15
+ function hashText(text) {
16
+ let hash = 0;
17
+ for (let i = 0; i < text.length; i++) {
18
+ const char = text.charCodeAt(i);
19
+ hash = ((hash << 5) - hash) + char;
20
+ hash = hash & hash;
21
+ }
22
+ return `h${hash.toString(36)}`;
23
+ }
24
+ /**
25
+ * Mock embedding provider for testing
26
+ */
27
+ class MockEmbeddingProvider {
28
+ constructor(dimensions = 384) {
29
+ this.name = 'mock';
30
+ this.dimensions = dimensions;
31
+ }
32
+ async embed(texts) {
33
+ return texts.map(text => {
34
+ // Generate deterministic pseudo-random embeddings based on text
35
+ const embedding = [];
36
+ let seed = 0;
37
+ for (let i = 0; i < text.length; i++) {
38
+ seed = ((seed << 5) - seed + text.charCodeAt(i)) | 0;
39
+ }
40
+ for (let i = 0; i < this.dimensions; i++) {
41
+ seed = (seed * 1103515245 + 12345) | 0;
42
+ embedding.push((seed % 1000) / 1000 - 0.5);
43
+ }
44
+ // Normalize
45
+ const norm = Math.sqrt(embedding.reduce((s, v) => s + v * v, 0));
46
+ return embedding.map(v => v / (norm || 1));
47
+ });
48
+ }
49
+ getDimensions() {
50
+ return this.dimensions;
51
+ }
52
+ }
53
+ exports.MockEmbeddingProvider = MockEmbeddingProvider;
54
+ /**
55
+ * Simple local embedding using character n-grams
56
+ * This is a fallback when no external provider is available
57
+ */
58
+ class LocalNGramProvider {
59
+ constructor(dimensions = 256, ngramSize = 3) {
60
+ this.name = 'local-ngram';
61
+ this.dimensions = dimensions;
62
+ this.ngramSize = ngramSize;
63
+ }
64
+ async embed(texts) {
65
+ return texts.map(text => this.embedSingle(text));
66
+ }
67
+ embedSingle(text) {
68
+ const embedding = new Array(this.dimensions).fill(0);
69
+ const normalized = text.toLowerCase().replace(/[^a-z0-9]/g, ' ');
70
+ // Generate n-grams and hash them into embedding dimensions
71
+ for (let i = 0; i <= normalized.length - this.ngramSize; i++) {
72
+ const ngram = normalized.slice(i, i + this.ngramSize);
73
+ const hash = this.hashNgram(ngram);
74
+ const idx = Math.abs(hash) % this.dimensions;
75
+ embedding[idx] += hash > 0 ? 1 : -1;
76
+ }
77
+ // Normalize
78
+ const norm = Math.sqrt(embedding.reduce((s, v) => s + v * v, 0));
79
+ return embedding.map(v => v / (norm || 1));
80
+ }
81
+ hashNgram(ngram) {
82
+ let hash = 0;
83
+ for (let i = 0; i < ngram.length; i++) {
84
+ hash = ((hash << 5) - hash + ngram.charCodeAt(i)) | 0;
85
+ }
86
+ return hash;
87
+ }
88
+ getDimensions() {
89
+ return this.dimensions;
90
+ }
91
+ }
92
+ exports.LocalNGramProvider = LocalNGramProvider;
93
+ /**
94
+ * Embedding service with caching and batching
95
+ */
96
+ class EmbeddingService {
97
+ constructor(config = {}) {
98
+ this.providers = new Map();
99
+ this.cache = new Map();
100
+ this.config = {
101
+ defaultProvider: config.defaultProvider ?? 'local-ngram',
102
+ maxCacheSize: config.maxCacheSize ?? 10000,
103
+ cacheTtl: config.cacheTtl ?? 3600000, // 1 hour
104
+ batchSize: config.batchSize ?? 32,
105
+ };
106
+ // Register default providers
107
+ this.registerProvider(new LocalNGramProvider());
108
+ this.registerProvider(new MockEmbeddingProvider());
109
+ }
110
+ /**
111
+ * Register an embedding provider
112
+ */
113
+ registerProvider(provider) {
114
+ this.providers.set(provider.name, provider);
115
+ }
116
+ /**
117
+ * Get a registered provider
118
+ */
119
+ getProvider(name) {
120
+ const providerName = name ?? this.config.defaultProvider;
121
+ const provider = this.providers.get(providerName);
122
+ if (!provider) {
123
+ throw new Error(`Provider not found: ${providerName}`);
124
+ }
125
+ return provider;
126
+ }
127
+ /**
128
+ * Generate embeddings for texts with caching
129
+ *
130
+ * @param texts - Texts to embed
131
+ * @param provider - Provider name (uses default if not specified)
132
+ * @returns Array of embeddings
133
+ */
134
+ async embed(texts, provider) {
135
+ const providerInstance = this.getProvider(provider);
136
+ const providerName = providerInstance.name;
137
+ const now = Date.now();
138
+ // Check cache and collect texts that need embedding
139
+ const results = new Array(texts.length).fill(null);
140
+ const uncachedIndices = [];
141
+ const uncachedTexts = [];
142
+ for (let i = 0; i < texts.length; i++) {
143
+ const cacheKey = `${providerName}:${hashText(texts[i])}`;
144
+ const cached = this.cache.get(cacheKey);
145
+ if (cached && now - cached.timestamp < this.config.cacheTtl) {
146
+ results[i] = cached.embedding;
147
+ cached.hits++;
148
+ }
149
+ else {
150
+ uncachedIndices.push(i);
151
+ uncachedTexts.push(texts[i]);
152
+ }
153
+ }
154
+ // Generate embeddings for uncached texts in batches
155
+ if (uncachedTexts.length > 0) {
156
+ const batches = [];
157
+ for (let i = 0; i < uncachedTexts.length; i += this.config.batchSize) {
158
+ batches.push(uncachedTexts.slice(i, i + this.config.batchSize));
159
+ }
160
+ let batchOffset = 0;
161
+ for (const batch of batches) {
162
+ const embeddings = await providerInstance.embed(batch);
163
+ for (let j = 0; j < embeddings.length; j++) {
164
+ const originalIndex = uncachedIndices[batchOffset + j];
165
+ results[originalIndex] = embeddings[j];
166
+ // Cache the result
167
+ const cacheKey = `${providerName}:${hashText(texts[originalIndex])}`;
168
+ this.addToCache(cacheKey, embeddings[j], now);
169
+ }
170
+ batchOffset += batch.length;
171
+ }
172
+ }
173
+ return results;
174
+ }
175
+ /**
176
+ * Generate a single embedding
177
+ */
178
+ async embedOne(text, provider) {
179
+ const results = await this.embed([text], provider);
180
+ return results[0];
181
+ }
182
+ /**
183
+ * Add entry to cache with LRU eviction
184
+ */
185
+ addToCache(key, embedding, timestamp) {
186
+ // Evict old entries if cache is full
187
+ if (this.cache.size >= this.config.maxCacheSize) {
188
+ // Find and remove least recently used entry
189
+ let oldestKey = '';
190
+ let oldestTime = Infinity;
191
+ let lowestHits = Infinity;
192
+ for (const [k, v] of this.cache.entries()) {
193
+ if (v.hits < lowestHits || (v.hits === lowestHits && v.timestamp < oldestTime)) {
194
+ oldestKey = k;
195
+ oldestTime = v.timestamp;
196
+ lowestHits = v.hits;
197
+ }
198
+ }
199
+ if (oldestKey) {
200
+ this.cache.delete(oldestKey);
201
+ }
202
+ }
203
+ this.cache.set(key, { embedding, timestamp, hits: 0 });
204
+ }
205
+ /**
206
+ * Compute cosine similarity between two embeddings
207
+ */
208
+ cosineSimilarity(a, b) {
209
+ if (a.length !== b.length) {
210
+ throw new Error('Embeddings must have same dimensions');
211
+ }
212
+ let dotProduct = 0;
213
+ let normA = 0;
214
+ let normB = 0;
215
+ for (let i = 0; i < a.length; i++) {
216
+ dotProduct += a[i] * b[i];
217
+ normA += a[i] * a[i];
218
+ normB += b[i] * b[i];
219
+ }
220
+ const denom = Math.sqrt(normA) * Math.sqrt(normB);
221
+ return denom === 0 ? 0 : dotProduct / denom;
222
+ }
223
+ /**
224
+ * Find most similar texts from a corpus
225
+ */
226
+ async findSimilar(query, corpus, k = 5, provider) {
227
+ const [queryEmbed, ...corpusEmbeds] = await this.embed([query, ...corpus], provider);
228
+ const results = corpusEmbeds.map((embed, i) => ({
229
+ text: corpus[i],
230
+ similarity: this.cosineSimilarity(queryEmbed, embed),
231
+ index: i,
232
+ }));
233
+ return results
234
+ .sort((a, b) => b.similarity - a.similarity)
235
+ .slice(0, k);
236
+ }
237
+ /**
238
+ * Get cache statistics
239
+ */
240
+ getCacheStats() {
241
+ let totalHits = 0;
242
+ for (const entry of this.cache.values()) {
243
+ totalHits += entry.hits;
244
+ }
245
+ return {
246
+ size: this.cache.size,
247
+ maxSize: this.config.maxCacheSize,
248
+ hitRate: this.cache.size > 0 ? totalHits / this.cache.size : 0,
249
+ };
250
+ }
251
+ /**
252
+ * Clear the cache
253
+ */
254
+ clearCache() {
255
+ this.cache.clear();
256
+ }
257
+ /**
258
+ * Get embedding dimensions for a provider
259
+ */
260
+ getDimensions(provider) {
261
+ return this.getProvider(provider).getDimensions();
262
+ }
263
+ /**
264
+ * List available providers
265
+ */
266
+ listProviders() {
267
+ return Array.from(this.providers.keys());
268
+ }
269
+ }
270
+ exports.EmbeddingService = EmbeddingService;
271
+ /**
272
+ * Create an embedding service instance
273
+ */
274
+ function createEmbeddingService(config) {
275
+ return new EmbeddingService(config);
276
+ }
277
+ // Singleton instance
278
+ let defaultService = null;
279
+ /**
280
+ * Get the default embedding service instance
281
+ */
282
+ function getDefaultEmbeddingService() {
283
+ if (!defaultService) {
284
+ defaultService = new EmbeddingService();
285
+ }
286
+ return defaultService;
287
+ }
288
+ exports.default = {
289
+ EmbeddingService,
290
+ LocalNGramProvider,
291
+ MockEmbeddingProvider,
292
+ createEmbeddingService,
293
+ getDefaultEmbeddingService,
294
+ };
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Services module exports
3
+ */
4
+ export * from './embedding-service';
5
+ export { default as embeddingService } from './embedding-service';
6
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/services/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,cAAc,qBAAqB,CAAC;AACpC,OAAO,EAAE,OAAO,IAAI,gBAAgB,EAAE,MAAM,qBAAqB,CAAC"}
@@ -0,0 +1,26 @@
1
+ "use strict";
2
+ /**
3
+ * Services module exports
4
+ */
5
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
6
+ if (k2 === undefined) k2 = k;
7
+ var desc = Object.getOwnPropertyDescriptor(m, k);
8
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
9
+ desc = { enumerable: true, get: function() { return m[k]; } };
10
+ }
11
+ Object.defineProperty(o, k2, desc);
12
+ }) : (function(o, m, k, k2) {
13
+ if (k2 === undefined) k2 = k;
14
+ o[k2] = m[k];
15
+ }));
16
+ var __exportStar = (this && this.__exportStar) || function(m, exports) {
17
+ for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
18
+ };
19
+ var __importDefault = (this && this.__importDefault) || function (mod) {
20
+ return (mod && mod.__esModule) ? mod : { "default": mod };
21
+ };
22
+ Object.defineProperty(exports, "__esModule", { value: true });
23
+ exports.embeddingService = void 0;
24
+ __exportStar(require("./embedding-service"), exports);
25
+ var embedding_service_1 = require("./embedding-service");
26
+ Object.defineProperty(exports, "embeddingService", { enumerable: true, get: function () { return __importDefault(embedding_service_1).default; } });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ruvector",
3
- "version": "0.1.26",
3
+ "version": "0.1.27",
4
4
  "description": "High-performance vector database for Node.js with automatic native/WASM fallback",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -33,7 +33,12 @@
33
33
  "attention",
34
34
  "transformer",
35
35
  "flash-attention",
36
- "hyperbolic"
36
+ "hyperbolic",
37
+ "sona",
38
+ "lora",
39
+ "ewc",
40
+ "adaptive-learning",
41
+ "continual-learning"
37
42
  ],
38
43
  "author": "ruv.io Team <info@ruv.io> (https://ruv.io)",
39
44
  "homepage": "https://ruv.io",
@@ -54,7 +59,8 @@
54
59
  "ora": "^5.4.1"
55
60
  },
56
61
  "optionalDependencies": {
57
- "@ruvector/attention": "^0.1.1"
62
+ "@ruvector/attention": "^0.1.1",
63
+ "@ruvector/sona": "^0.1.3"
58
64
  },
59
65
  "devDependencies": {
60
66
  "@types/node": "^20.10.5",