@imayuur/contexthub-vector-engine 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,63 @@
1
+ import type { VectorSearchResult, MemoryEntry } from '@imayuur/contexthub-shared-types';
2
+ export type EmbeddingMode = 'local' | 'off' | 'transformers';
3
+ export declare class VectorEngine {
4
+ private embeddingsPath;
5
+ private embeddings;
6
+ private dimension;
7
+ private mode;
8
+ private static extractor;
9
+ constructor(repoPath: string, mode?: EmbeddingMode);
10
+ private loadEmbeddings;
11
+ private saveEmbeddings;
12
+ /**
13
+ * Lazy load the transformers pipeline
14
+ */
15
+ private getExtractor;
16
+ /**
17
+ * Generate embedding from text using the selected mode
18
+ */
19
+ generateEmbedding(text: string): Promise<number[]>;
20
+ /**
21
+ * Add embedding for a memory entry
22
+ */
23
+ addEmbedding(id: string, embedding: number[]): Promise<void>;
24
+ /**
25
+ * Add embedding by generating it from content
26
+ */
27
+ addEmbeddingForContent(id: string, content: string): Promise<void>;
28
+ /**
29
+ * Search for similar embeddings using cosine similarity
30
+ */
31
+ searchSimilar(embedding: number[], limit?: number): Promise<VectorSearchResult[]>;
32
+ /**
33
+ * Search for similar text content
34
+ */
35
+ searchSimilarText(query: string, memories: MemoryEntry[], limit?: number): Promise<VectorSearchResult[]>;
36
+ /**
37
+ * Update an existing embedding
38
+ */
39
+ updateEmbedding(id: string, embedding: number[]): Promise<void>;
40
+ /**
41
+ * Delete an embedding
42
+ */
43
+ deleteEmbedding(id: string): Promise<void>;
44
+ /**
45
+ * Calculate cosine similarity between two vectors
46
+ */
47
+ private cosineSimilarity;
48
+ /**
49
+ * Batch generate embeddings for multiple texts
50
+ */
51
+ batchGenerateEmbeddings(items: {
52
+ id: string;
53
+ content: string;
54
+ }[]): Promise<void>;
55
+ /**
56
+ * Get embedding count
57
+ */
58
+ getEmbeddingCount(): number;
59
+ /**
60
+ * Clear all embeddings
61
+ */
62
+ clearAll(): Promise<void>;
63
+ }
package/dist/index.js ADDED
@@ -0,0 +1,279 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.VectorEngine = void 0;
37
+ const fs = __importStar(require("fs"));
38
+ const path = __importStar(require("path"));
39
+ const crypto = __importStar(require("crypto"));
40
+ // ── Security Constants ────────────────────────────────────────────────────
41
+ const MAX_EMBEDDINGS = 10000;
42
+ const MAX_FILE_SIZE = 50 * 1024 * 1024; // 50MB
43
+ class VectorEngine {
44
+ constructor(repoPath, mode = 'local') {
45
+ this.mode = mode;
46
+ this.dimension = mode === 'transformers' ? 384 : 1536;
47
+ this.embeddingsPath = path.join(repoPath, '.contexthub', 'embeddings');
48
+ this.embeddings = this.loadEmbeddings();
49
+ }
50
+ loadEmbeddings() {
51
+ try {
52
+ const indexPath = path.join(this.embeddingsPath, 'index.json');
53
+ if (fs.existsSync(indexPath)) {
54
+ // Security: Check file size before reading
55
+ const stats = fs.statSync(indexPath);
56
+ if (stats.size > MAX_FILE_SIZE) {
57
+ console.error('Embeddings file too large, starting fresh');
58
+ return {};
59
+ }
60
+ return JSON.parse(fs.readFileSync(indexPath, 'utf-8'));
61
+ }
62
+ }
63
+ catch (e) {
64
+ console.error('Failed to load embeddings:', e?.message || 'unknown error');
65
+ }
66
+ return {};
67
+ }
68
+ saveEmbeddings() {
69
+ // Security: Cap embedding count
70
+ const keys = Object.keys(this.embeddings);
71
+ if (keys.length > MAX_EMBEDDINGS) {
72
+ // Remove oldest entries (by key insertion order)
73
+ const toRemove = keys.slice(0, keys.length - MAX_EMBEDDINGS);
74
+ for (const key of toRemove) {
75
+ delete this.embeddings[key];
76
+ }
77
+ console.error(`Embedding store capped at ${MAX_EMBEDDINGS} entries`);
78
+ }
79
+ if (!fs.existsSync(this.embeddingsPath)) {
80
+ fs.mkdirSync(this.embeddingsPath, { recursive: true });
81
+ }
82
+ // Atomic write: write to tmp, then rename
83
+ const indexPath = path.join(this.embeddingsPath, 'index.json');
84
+ const tmpPath = indexPath + `.tmp.${crypto.randomBytes(4).toString('hex')}`;
85
+ try {
86
+ fs.writeFileSync(tmpPath, JSON.stringify(this.embeddings, null, 2), { mode: 0o600 });
87
+ fs.renameSync(tmpPath, indexPath);
88
+ }
89
+ catch (e) {
90
+ try {
91
+ fs.unlinkSync(tmpPath);
92
+ }
93
+ catch { /* ignore */ }
94
+ throw e;
95
+ }
96
+ }
97
+ /**
98
+ * Lazy load the transformers pipeline
99
+ */
100
+ async getExtractor() {
101
+ if (!VectorEngine.extractor) {
102
+ try {
103
+ // Use dynamic import to avoid static dependency and bundle bloat
104
+ const transformers = await Function('return import("@xenova/transformers")')();
105
+ // Setup cache dir within .contexthub if we want, or rely on default
106
+ VectorEngine.extractor = await transformers.pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
107
+ }
108
+ catch (e) {
109
+ throw new Error('Failed to load @xenova/transformers. Install it with: npm install @xenova/transformers\n' + e.message);
110
+ }
111
+ }
112
+ return VectorEngine.extractor;
113
+ }
114
+ /**
115
+ * Generate embedding from text using the selected mode
116
+ */
117
+ async generateEmbedding(text) {
118
+ if (this.mode === 'off') {
119
+ return [];
120
+ }
121
+ if (this.mode === 'transformers') {
122
+ const extractor = await this.getExtractor();
123
+ const output = await extractor(text, { pooling: 'mean', normalize: true });
124
+ return Array.from(output.data);
125
+ }
126
+ // Local mode: Hash + Bigram TF weighting
127
+ const words = text.toLowerCase().split(/\W+/).filter(w => w.length > 2);
128
+ const termFreqs = new Map();
129
+ // Unigrams
130
+ for (const word of words) {
131
+ termFreqs.set(word, (termFreqs.get(word) || 0) + 1);
132
+ }
133
+ // Bigrams
134
+ for (let i = 0; i < words.length - 1; i++) {
135
+ const bigram = `${words[i]} ${words[i + 1]}`;
136
+ termFreqs.set(bigram, (termFreqs.get(bigram) || 0) + 1);
137
+ }
138
+ const embedding = new Array(this.dimension).fill(0);
139
+ for (const [term, freq] of termFreqs.entries()) {
140
+ let hash = 0;
141
+ for (let i = 0; i < term.length; i++) {
142
+ hash = ((hash << 5) - hash) + term.charCodeAt(i);
143
+ hash = hash & hash;
144
+ }
145
+ const seed = Math.abs(hash);
146
+ const isBigram = term.includes(' ');
147
+ // Spread across 3 positions based on seed
148
+ const pos1 = seed % this.dimension;
149
+ const pos2 = (seed * 17) % this.dimension;
150
+ const pos3 = (seed * 31) % this.dimension;
151
+ // TF weighting: 1 + log10(tf)
152
+ const tfWeight = 1 + Math.log10(freq);
153
+ // Bigrams receive a slight multiplier to emphasize phrase matches
154
+ const weight = tfWeight * (isBigram ? 1.5 : 1.0);
155
+ embedding[pos1] += weight;
156
+ embedding[pos2] += weight * 0.5;
157
+ embedding[pos3] += weight * 0.25;
158
+ }
159
+ // Normalize
160
+ const magnitude = Math.sqrt(embedding.reduce((sum, val) => sum + val * val, 0));
161
+ if (magnitude > 0) {
162
+ for (let i = 0; i < embedding.length; i++) {
163
+ embedding[i] /= magnitude;
164
+ }
165
+ }
166
+ return embedding;
167
+ }
168
+ /**
169
+ * Add embedding for a memory entry
170
+ */
171
+ async addEmbedding(id, embedding) {
172
+ this.embeddings[id] = embedding;
173
+ this.saveEmbeddings();
174
+ console.log(`Added embedding for memory ${id} (${embedding.length} dimensions)`);
175
+ }
176
+ /**
177
+ * Add embedding by generating it from content
178
+ */
179
+ async addEmbeddingForContent(id, content) {
180
+ const embedding = await this.generateEmbedding(content);
181
+ await this.addEmbedding(id, embedding);
182
+ }
183
+ /**
184
+ * Search for similar embeddings using cosine similarity
185
+ */
186
+ async searchSimilar(embedding, limit = 10) {
187
+ const results = [];
188
+ for (const [id, storedEmbedding] of Object.entries(this.embeddings)) {
189
+ const score = this.cosineSimilarity(embedding, storedEmbedding);
190
+ results.push({ id, score, metadata: null });
191
+ }
192
+ // Sort by score descending and limit
193
+ results.sort((a, b) => b.score - a.score);
194
+ return results.slice(0, limit);
195
+ }
196
+ /**
197
+ * Search for similar text content
198
+ */
199
+ async searchSimilarText(query, memories, limit = 10) {
200
+ const queryEmbedding = await this.generateEmbedding(query);
201
+ const results = [];
202
+ for (const memory of memories) {
203
+ let embedding = this.embeddings[memory.id];
204
+ if (!embedding && memory.content) {
205
+ embedding = await this.generateEmbedding(memory.content);
206
+ this.embeddings[memory.id] = embedding;
207
+ }
208
+ if (embedding) {
209
+ const score = this.cosineSimilarity(queryEmbedding, embedding);
210
+ results.push({ id: memory.id, score, metadata: memory });
211
+ }
212
+ }
213
+ this.saveEmbeddings();
214
+ results.sort((a, b) => b.score - a.score);
215
+ return results.slice(0, limit);
216
+ }
217
+ /**
218
+ * Update an existing embedding
219
+ */
220
+ async updateEmbedding(id, embedding) {
221
+ this.embeddings[id] = embedding;
222
+ this.saveEmbeddings();
223
+ console.log(`Updated embedding for memory ${id}`);
224
+ }
225
+ /**
226
+ * Delete an embedding
227
+ */
228
+ async deleteEmbedding(id) {
229
+ delete this.embeddings[id];
230
+ this.saveEmbeddings();
231
+ console.log(`Deleted embedding for memory ${id}`);
232
+ }
233
+ /**
234
+ * Calculate cosine similarity between two vectors
235
+ */
236
+ cosineSimilarity(a, b) {
237
+ if (a.length !== b.length)
238
+ return 0;
239
+ let dotProduct = 0;
240
+ let magnitudeA = 0;
241
+ let magnitudeB = 0;
242
+ for (let i = 0; i < a.length; i++) {
243
+ dotProduct += a[i] * b[i];
244
+ magnitudeA += a[i] * a[i];
245
+ magnitudeB += b[i] * b[i];
246
+ }
247
+ const magnitudeProduct = Math.sqrt(magnitudeA) * Math.sqrt(magnitudeB);
248
+ if (magnitudeProduct === 0)
249
+ return 0;
250
+ return dotProduct / magnitudeProduct;
251
+ }
252
+ /**
253
+ * Batch generate embeddings for multiple texts
254
+ */
255
+ async batchGenerateEmbeddings(items) {
256
+ for (const item of items) {
257
+ const embedding = await this.generateEmbedding(item.content);
258
+ this.embeddings[item.id] = embedding;
259
+ }
260
+ this.saveEmbeddings();
261
+ console.log(`Generated embeddings for ${items.length} items`);
262
+ }
263
+ /**
264
+ * Get embedding count
265
+ */
266
+ getEmbeddingCount() {
267
+ return Object.keys(this.embeddings).length;
268
+ }
269
+ /**
270
+ * Clear all embeddings
271
+ */
272
+ async clearAll() {
273
+ this.embeddings = {};
274
+ this.saveEmbeddings();
275
+ console.log('Cleared all embeddings');
276
+ }
277
+ }
278
+ exports.VectorEngine = VectorEngine;
279
+ VectorEngine.extractor = null;
package/package.json ADDED
@@ -0,0 +1,52 @@
1
+ {
2
+ "name": "@imayuur/contexthub-vector-engine",
3
+ "version": "1.0.0",
4
+ "description": "Embeddings and semantic search for ContextHub",
5
+ "license": "MIT",
6
+ "repository": {
7
+ "type": "git",
8
+ "url": "git+https://github.com/iMayuuR/contexthub.git",
9
+ "directory": "packages/vector-engine"
10
+ },
11
+ "publishConfig": {
12
+ "access": "public"
13
+ },
14
+ "engines": {
15
+ "node": ">=18"
16
+ },
17
+ "main": "dist/index.js",
18
+ "types": "dist/index.d.ts",
19
+ "files": [
20
+ "dist"
21
+ ],
22
+ "scripts": {
23
+ "build": "tsc",
24
+ "dev": "tsc --watch",
25
+ "prepublishOnly": "npm run build"
26
+ },
27
+ "dependencies": {
28
+ "@imayuur/contexthub-shared-types": "^1.0.0"
29
+ },
30
+ "devDependencies": {
31
+ "@types/node": "^18.0.0",
32
+ "typescript": "^5.0.0"
33
+ },
34
+ "author": "Mayur Dattatray Patil",
35
+ "bugs": {
36
+ "url": "https://github.com/iMayuuR/contexthub/issues"
37
+ },
38
+ "homepage": "https://github.com/iMayuuR/contexthub#readme",
39
+ "keywords": [
40
+ "contexthub",
41
+ "mcp",
42
+ "ai-memory",
43
+ "cursor",
44
+ "claude"
45
+ ],
46
+ "exports": {
47
+ ".": {
48
+ "types": "./dist/index.d.ts",
49
+ "default": "./dist/index.js"
50
+ }
51
+ }
52
+ }