ultra-dex 3.2.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,477 @@
1
+ /**
2
+ * ultra-dex search command
3
+ * Semantic code search using vector embeddings
4
+ * This enables AI to UNDERSTAND your codebase, not just pattern match
5
+ */
6
+
7
+ import chalk from 'chalk';
8
+ import ora from 'ora';
9
+ import fs from 'fs/promises';
10
+ import path from 'path';
11
+ import { glob } from 'glob';
12
+ import { createProvider, getDefaultProvider, checkConfiguredProviders } from '../providers/index.js';
13
+
14
+ // ============================================================================
15
+ // VECTOR STORE CONFIGURATION
16
+ // ============================================================================
17
+
18
+ const EMBEDDINGS_CONFIG = {
19
+ // File patterns to index
20
+ includePatterns: [
21
+ '**/*.js', '**/*.ts', '**/*.tsx', '**/*.jsx',
22
+ '**/*.py', '**/*.go', '**/*.rs', '**/*.rb',
23
+ '**/*.md', '**/*.json', '**/*.yaml', '**/*.yml',
24
+ '**/*.prisma', '**/*.sql', '**/*.graphql',
25
+ ],
26
+
27
+ // Directories to exclude
28
+ excludeDirs: [
29
+ 'node_modules', '.git', 'dist', 'build', '.next',
30
+ 'coverage', '__pycache__', '.venv', 'vendor',
31
+ '.ultra-dex', '.ultra',
32
+ ],
33
+
34
+ // Chunk settings
35
+ chunkSize: 1000, // characters per chunk
36
+ chunkOverlap: 200,
37
+
38
+ // Index file location
39
+ indexPath: '.ultra-dex/embeddings.json',
40
+ };
41
+
42
+ // ============================================================================
43
+ // SIMPLE VECTOR STORE (In-Memory + File Persistence)
44
+ // ============================================================================
45
+
46
+ class VectorStore {
47
+ constructor() {
48
+ this.documents = []; // { id, path, content, embedding, chunk }
49
+ this.metadata = {
50
+ createdAt: null,
51
+ updatedAt: null,
52
+ fileCount: 0,
53
+ chunkCount: 0,
54
+ };
55
+ }
56
+
57
+ /**
58
+ * Cosine similarity between two vectors
59
+ */
60
+ cosineSimilarity(a, b) {
61
+ if (!a || !b || a.length !== b.length) return 0;
62
+
63
+ let dotProduct = 0;
64
+ let normA = 0;
65
+ let normB = 0;
66
+
67
+ for (let i = 0; i < a.length; i++) {
68
+ dotProduct += a[i] * b[i];
69
+ normA += a[i] * a[i];
70
+ normB += b[i] * b[i];
71
+ }
72
+
73
+ return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
74
+ }
75
+
76
+ /**
77
+ * Add document with embedding
78
+ */
79
+ addDocument(doc) {
80
+ this.documents.push(doc);
81
+ this.metadata.chunkCount = this.documents.length;
82
+ this.metadata.updatedAt = new Date().toISOString();
83
+ }
84
+
85
+ /**
86
+ * Search for similar documents
87
+ */
88
+ search(queryEmbedding, topK = 5) {
89
+ const results = this.documents.map(doc => ({
90
+ ...doc,
91
+ score: this.cosineSimilarity(queryEmbedding, doc.embedding),
92
+ }));
93
+
94
+ return results
95
+ .sort((a, b) => b.score - a.score)
96
+ .slice(0, topK);
97
+ }
98
+
99
+ /**
100
+ * Save to disk
101
+ */
102
+ async save(filepath) {
103
+ await fs.mkdir(path.dirname(filepath), { recursive: true });
104
+ await fs.writeFile(filepath, JSON.stringify({
105
+ metadata: this.metadata,
106
+ documents: this.documents,
107
+ }, null, 2));
108
+ }
109
+
110
+ /**
111
+ * Load from disk
112
+ */
113
+ async load(filepath) {
114
+ try {
115
+ const data = JSON.parse(await fs.readFile(filepath, 'utf8'));
116
+ this.metadata = data.metadata;
117
+ this.documents = data.documents;
118
+ return true;
119
+ } catch {
120
+ return false;
121
+ }
122
+ }
123
+
124
+ /**
125
+ * Clear the store
126
+ */
127
+ clear() {
128
+ this.documents = [];
129
+ this.metadata = {
130
+ createdAt: null,
131
+ updatedAt: null,
132
+ fileCount: 0,
133
+ chunkCount: 0,
134
+ };
135
+ }
136
+ }
137
+
138
+ // Global store instance
139
+ const vectorStore = new VectorStore();
140
+
141
+ // ============================================================================
142
+ // TEXT CHUNKING
143
+ // ============================================================================
144
+
145
+ /**
146
+ * Split text into overlapping chunks
147
+ */
148
+ function chunkText(text, filepath) {
149
+ const chunks = [];
150
+ const { chunkSize, chunkOverlap } = EMBEDDINGS_CONFIG;
151
+
152
+ // If text is small enough, use as single chunk
153
+ if (text.length <= chunkSize) {
154
+ return [{
155
+ content: text,
156
+ path: filepath,
157
+ chunk: 0,
158
+ total: 1,
159
+ }];
160
+ }
161
+
162
+ let start = 0;
163
+ let chunkIndex = 0;
164
+
165
+ while (start < text.length) {
166
+ const end = Math.min(start + chunkSize, text.length);
167
+ const chunk = text.slice(start, end);
168
+
169
+ chunks.push({
170
+ content: chunk,
171
+ path: filepath,
172
+ chunk: chunkIndex,
173
+ total: Math.ceil(text.length / (chunkSize - chunkOverlap)),
174
+ });
175
+
176
+ start += chunkSize - chunkOverlap;
177
+ chunkIndex++;
178
+ }
179
+
180
+ return chunks;
181
+ }
182
+
183
+ // ============================================================================
184
+ // EMBEDDING GENERATION
185
+ // ============================================================================
186
+
187
+ /**
188
+ * Generate embeddings using AI provider
189
+ * Falls back to simple TF-IDF-like approach if no API key
190
+ */
191
+ async function generateEmbedding(text, provider = null) {
192
+ // If we have an OpenAI-compatible provider, use their embeddings API
193
+ if (provider && provider.getEmbedding) {
194
+ try {
195
+ return await provider.getEmbedding(text);
196
+ } catch (err) {
197
+ console.log(chalk.yellow(`Embedding API failed, using local fallback: ${err.message}`));
198
+ }
199
+ }
200
+
201
+ // Fallback: Simple bag-of-words embedding
202
+ // This is a basic implementation - real embeddings would be much better
203
+ return generateLocalEmbedding(text);
204
+ }
205
+
206
+ /**
207
+ * Simple local embedding using TF-IDF-like approach
208
+ * This is a fallback when no API is available
209
+ */
210
+ function generateLocalEmbedding(text, dimensions = 384) {
211
+ const words = text.toLowerCase()
212
+ .replace(/[^a-z0-9\s]/g, ' ')
213
+ .split(/\s+/)
214
+ .filter(w => w.length > 2);
215
+
216
+ // Create a simple hash-based embedding
217
+ const embedding = new Array(dimensions).fill(0);
218
+
219
+ for (const word of words) {
220
+ // Hash the word to get consistent positions
221
+ let hash = 0;
222
+ for (let i = 0; i < word.length; i++) {
223
+ hash = ((hash << 5) - hash) + word.charCodeAt(i);
224
+ hash = hash & hash; // Convert to 32bit integer
225
+ }
226
+
227
+ // Use hash to update multiple positions
228
+ for (let i = 0; i < 3; i++) {
229
+ const pos = Math.abs((hash + i * 127) % dimensions);
230
+ embedding[pos] += 1;
231
+ }
232
+ }
233
+
234
+ // Normalize
235
+ const magnitude = Math.sqrt(embedding.reduce((sum, val) => sum + val * val, 0));
236
+ if (magnitude > 0) {
237
+ for (let i = 0; i < dimensions; i++) {
238
+ embedding[i] /= magnitude;
239
+ }
240
+ }
241
+
242
+ return embedding;
243
+ }
244
+
245
+ // ============================================================================
246
+ // INDEXING
247
+ // ============================================================================
248
+
249
+ /**
250
+ * Index the codebase
251
+ */
252
+ export async function indexCodebase(workdir = process.cwd(), options = {}) {
253
+ const { force = false, verbose = false } = options;
254
+ const indexPath = path.join(workdir, EMBEDDINGS_CONFIG.indexPath);
255
+
256
+ // Check if index exists and is recent
257
+ if (!force) {
258
+ const loaded = await vectorStore.load(indexPath);
259
+ if (loaded) {
260
+ const indexAge = Date.now() - new Date(vectorStore.metadata.updatedAt).getTime();
261
+ const oneHour = 60 * 60 * 1000;
262
+
263
+ if (indexAge < oneHour) {
264
+ return {
265
+ cached: true,
266
+ files: vectorStore.metadata.fileCount,
267
+ chunks: vectorStore.metadata.chunkCount,
268
+ };
269
+ }
270
+ }
271
+ }
272
+
273
+ // Clear and rebuild
274
+ vectorStore.clear();
275
+ vectorStore.metadata.createdAt = new Date().toISOString();
276
+
277
+ // Find all files to index
278
+ const excludePattern = EMBEDDINGS_CONFIG.excludeDirs.map(d => `**/${d}/**`);
279
+ const files = [];
280
+
281
+ for (const pattern of EMBEDDINGS_CONFIG.includePatterns) {
282
+ const matches = await glob(pattern, {
283
+ cwd: workdir,
284
+ ignore: excludePattern,
285
+ nodir: true,
286
+ });
287
+ files.push(...matches);
288
+ }
289
+
290
+ // Deduplicate
291
+ const uniqueFiles = [...new Set(files)];
292
+ vectorStore.metadata.fileCount = uniqueFiles.length;
293
+
294
+ if (verbose) {
295
+ console.log(chalk.gray(`Found ${uniqueFiles.length} files to index`));
296
+ }
297
+
298
+ // Index each file
299
+ let chunkCount = 0;
300
+ for (const file of uniqueFiles) {
301
+ try {
302
+ const filepath = path.join(workdir, file);
303
+ const content = await fs.readFile(filepath, 'utf8');
304
+
305
+ // Skip very large files
306
+ if (content.length > 100000) {
307
+ if (verbose) console.log(chalk.gray(`Skipping large file: ${file}`));
308
+ continue;
309
+ }
310
+
311
+ // Chunk the content
312
+ const chunks = chunkText(content, file);
313
+
314
+ // Generate embeddings for each chunk
315
+ for (const chunk of chunks) {
316
+ const embedding = await generateEmbedding(chunk.content);
317
+
318
+ vectorStore.addDocument({
319
+ id: `${file}:${chunk.chunk}`,
320
+ path: chunk.path,
321
+ content: chunk.content,
322
+ embedding,
323
+ chunk: chunk.chunk,
324
+ total: chunk.total,
325
+ });
326
+
327
+ chunkCount++;
328
+ }
329
+ } catch (err) {
330
+ if (verbose) {
331
+ console.log(chalk.yellow(`Failed to index ${file}: ${err.message}`));
332
+ }
333
+ }
334
+ }
335
+
336
+ // Save index
337
+ await vectorStore.save(indexPath);
338
+
339
+ return {
340
+ cached: false,
341
+ files: uniqueFiles.length,
342
+ chunks: chunkCount,
343
+ };
344
+ }
345
+
346
+ /**
347
+ * Search the indexed codebase
348
+ */
349
+ export async function searchCodebase(query, options = {}) {
350
+ const { workdir = process.cwd(), topK = 10 } = options;
351
+ const indexPath = path.join(workdir, EMBEDDINGS_CONFIG.indexPath);
352
+
353
+ // Ensure index is loaded
354
+ if (vectorStore.documents.length === 0) {
355
+ const loaded = await vectorStore.load(indexPath);
356
+ if (!loaded) {
357
+ throw new Error('No index found. Run `ultra-dex search --index` first.');
358
+ }
359
+ }
360
+
361
+ // Generate query embedding
362
+ const queryEmbedding = await generateEmbedding(query);
363
+
364
+ // Search
365
+ const results = vectorStore.search(queryEmbedding, topK);
366
+
367
+ return results.map(r => ({
368
+ path: r.path,
369
+ chunk: r.chunk,
370
+ score: r.score,
371
+ preview: r.content.slice(0, 200) + (r.content.length > 200 ? '...' : ''),
372
+ }));
373
+ }
374
+
375
+ // ============================================================================
376
+ // CLI COMMAND
377
+ // ============================================================================
378
+
379
+ export function registerSearchCommand(program) {
380
+ program
381
+ .command('search [query]')
382
+ .description('Semantic search across your codebase using embeddings')
383
+ .option('--index', 'Rebuild the search index')
384
+ .option('--force', 'Force full re-index')
385
+ .option('-k, --top <n>', 'Number of results', '10')
386
+ .option('-v, --verbose', 'Show detailed output')
387
+ .option('--stats', 'Show index statistics')
388
+ .action(async (query, options) => {
389
+ console.log(chalk.cyan('\nšŸ” Ultra-Dex Semantic Search\n'));
390
+
391
+ const workdir = process.cwd();
392
+ const indexPath = path.join(workdir, EMBEDDINGS_CONFIG.indexPath);
393
+
394
+ if (options.stats) {
395
+ // Show index stats
396
+ const loaded = await vectorStore.load(indexPath);
397
+ if (!loaded) {
398
+ console.log(chalk.yellow('No index found.'));
399
+ return;
400
+ }
401
+
402
+ console.log(chalk.bold('Index Statistics:'));
403
+ console.log(` Created: ${vectorStore.metadata.createdAt}`);
404
+ console.log(` Updated: ${vectorStore.metadata.updatedAt}`);
405
+ console.log(` Files: ${vectorStore.metadata.fileCount}`);
406
+ console.log(` Chunks: ${vectorStore.metadata.chunkCount}`);
407
+ return;
408
+ }
409
+
410
+ if (options.index || !query) {
411
+ // Build/rebuild index
412
+ const spinner = ora('Indexing codebase...').start();
413
+
414
+ try {
415
+ const result = await indexCodebase(workdir, {
416
+ force: options.force,
417
+ verbose: options.verbose,
418
+ });
419
+
420
+ if (result.cached) {
421
+ spinner.succeed(`Using cached index (${result.files} files, ${result.chunks} chunks)`);
422
+ } else {
423
+ spinner.succeed(`Indexed ${result.files} files into ${result.chunks} chunks`);
424
+ }
425
+ } catch (err) {
426
+ spinner.fail(`Indexing failed: ${err.message}`);
427
+ return;
428
+ }
429
+
430
+ if (!query) {
431
+ console.log(chalk.gray('\nIndex ready. Use `ultra-dex search "your query"` to search.'));
432
+ return;
433
+ }
434
+ }
435
+
436
+ // Search
437
+ const spinner = ora(`Searching for: "${query}"`).start();
438
+
439
+ try {
440
+ const results = await searchCodebase(query, {
441
+ workdir,
442
+ topK: parseInt(options.top, 10),
443
+ });
444
+
445
+ spinner.succeed(`Found ${results.length} results\n`);
446
+
447
+ if (results.length === 0) {
448
+ console.log(chalk.yellow('No matches found. Try different keywords or rebuild the index.'));
449
+ return;
450
+ }
451
+
452
+ // Display results
453
+ for (let i = 0; i < results.length; i++) {
454
+ const r = results[i];
455
+ const scoreColor = r.score > 0.7 ? chalk.green : r.score > 0.4 ? chalk.yellow : chalk.gray;
456
+
457
+ console.log(chalk.bold(`${i + 1}. ${r.path}`) + chalk.gray(` (chunk ${r.chunk})`));
458
+ console.log(` Score: ${scoreColor(r.score.toFixed(3))}`);
459
+ console.log(chalk.gray(` ${r.preview.replace(/\n/g, ' ')}`));
460
+ console.log();
461
+ }
462
+
463
+ // Hint
464
+ console.log(chalk.gray('Tip: Use --index --force to rebuild embeddings if results seem stale.'));
465
+
466
+ } catch (err) {
467
+ spinner.fail(`Search failed: ${err.message}`);
468
+ }
469
+ });
470
+ }
471
+
472
+ export default {
473
+ registerSearchCommand,
474
+ indexCodebase,
475
+ searchCodebase,
476
+ vectorStore,
477
+ };