agentic-flow 2.0.1-alpha.17 → 2.0.1-alpha.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +76 -0
- package/dist/.tsbuildinfo +1 -1
- package/dist/intelligence/EmbeddingCache.d.ts +105 -0
- package/dist/intelligence/EmbeddingCache.d.ts.map +1 -0
- package/dist/intelligence/EmbeddingCache.js +253 -0
- package/dist/intelligence/EmbeddingCache.js.map +1 -0
- package/dist/intelligence/EmbeddingService.d.ts +213 -1
- package/dist/intelligence/EmbeddingService.d.ts.map +1 -1
- package/dist/intelligence/EmbeddingService.js +965 -7
- package/dist/intelligence/EmbeddingService.js.map +1 -1
- package/package.json +1 -1
|
@@ -5,11 +5,15 @@
|
|
|
5
5
|
* - SIMD128 acceleration (6x faster)
|
|
6
6
|
* - Parallel worker threads (7 workers)
|
|
7
7
|
* - all-MiniLM-L6-v2 model (384 dimensions)
|
|
8
|
+
* - Persistent SQLite cache (0.1ms vs 400ms)
|
|
8
9
|
*
|
|
9
10
|
* Configure via:
|
|
10
11
|
* - AGENTIC_FLOW_EMBEDDINGS=simple|onnx|auto (default: auto)
|
|
11
12
|
* - AGENTIC_FLOW_EMBEDDING_MODEL=all-MiniLM-L6-v2 (default)
|
|
13
|
+
* - AGENTIC_FLOW_EMBEDDING_CACHE=true|false (default: true)
|
|
14
|
+
* - AGENTIC_FLOW_PERSISTENT_CACHE=true|false (default: true)
|
|
12
15
|
*/
|
|
16
|
+
import { getEmbeddingCache } from './EmbeddingCache.js';
|
|
13
17
|
// ONNX availability cache
|
|
14
18
|
let onnxAvailable = null;
|
|
15
19
|
let ruvectorModule = null;
|
|
@@ -32,8 +36,8 @@ async function detectOnnx() {
|
|
|
32
36
|
return false;
|
|
33
37
|
}
|
|
34
38
|
}
|
|
35
|
-
// Simple LRU cache for embeddings
|
|
36
|
-
class
|
|
39
|
+
// Simple LRU cache for embeddings (in-memory, fast)
|
|
40
|
+
class LRUCache {
|
|
37
41
|
cache = new Map();
|
|
38
42
|
maxSize;
|
|
39
43
|
constructor(maxSize = 1000) {
|
|
@@ -78,9 +82,12 @@ export class EmbeddingService {
|
|
|
78
82
|
totalEmbeddings = 0;
|
|
79
83
|
totalLatencyMs = 0;
|
|
80
84
|
cacheHits = 0;
|
|
81
|
-
// Cache
|
|
85
|
+
// Cache (in-memory LRU)
|
|
82
86
|
cache;
|
|
83
87
|
cacheEnabled;
|
|
88
|
+
// Persistent cache (SQLite)
|
|
89
|
+
persistentCache = null;
|
|
90
|
+
persistentCacheEnabled;
|
|
84
91
|
// Corpus for search operations
|
|
85
92
|
corpus = { texts: [], embeddings: [] };
|
|
86
93
|
constructor() {
|
|
@@ -89,7 +96,18 @@ export class EmbeddingService {
|
|
|
89
96
|
this.modelName = process.env.AGENTIC_FLOW_EMBEDDING_MODEL || 'all-MiniLM-L6-v2';
|
|
90
97
|
this.dimension = 256; // Will be updated when ONNX loads (384)
|
|
91
98
|
this.cacheEnabled = process.env.AGENTIC_FLOW_EMBEDDING_CACHE !== 'false';
|
|
92
|
-
this.
|
|
99
|
+
this.persistentCacheEnabled = process.env.AGENTIC_FLOW_PERSISTENT_CACHE !== 'false';
|
|
100
|
+
this.cache = new LRUCache(1000);
|
|
101
|
+
// Initialize persistent cache
|
|
102
|
+
if (this.persistentCacheEnabled) {
|
|
103
|
+
try {
|
|
104
|
+
this.persistentCache = getEmbeddingCache({ dimension: 384 });
|
|
105
|
+
}
|
|
106
|
+
catch (error) {
|
|
107
|
+
console.warn('[EmbeddingService] Persistent cache unavailable:', error);
|
|
108
|
+
this.persistentCacheEnabled = false;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
93
111
|
}
|
|
94
112
|
static getInstance() {
|
|
95
113
|
if (!EmbeddingService.instance) {
|
|
@@ -150,7 +168,7 @@ export class EmbeddingService {
|
|
|
150
168
|
*/
|
|
151
169
|
async embed(text) {
|
|
152
170
|
const startTime = performance.now();
|
|
153
|
-
// Check cache
|
|
171
|
+
// Check in-memory cache first (fastest)
|
|
154
172
|
if (this.cacheEnabled) {
|
|
155
173
|
const cached = this.cache.get(text);
|
|
156
174
|
if (cached) {
|
|
@@ -158,6 +176,18 @@ export class EmbeddingService {
|
|
|
158
176
|
return cached;
|
|
159
177
|
}
|
|
160
178
|
}
|
|
179
|
+
// Check persistent cache (SQLite, ~0.1ms)
|
|
180
|
+
if (this.persistentCache) {
|
|
181
|
+
const cached = this.persistentCache.get(text, this.modelName);
|
|
182
|
+
if (cached) {
|
|
183
|
+
this.cacheHits++;
|
|
184
|
+
// Also store in memory cache for faster subsequent access
|
|
185
|
+
if (this.cacheEnabled) {
|
|
186
|
+
this.cache.set(text, cached);
|
|
187
|
+
}
|
|
188
|
+
return cached;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
161
191
|
// Resolve backend (handles 'auto' mode)
|
|
162
192
|
const effectiveBackend = await this.resolveBackend();
|
|
163
193
|
let embedding;
|
|
@@ -177,10 +207,14 @@ export class EmbeddingService {
|
|
|
177
207
|
// Update stats
|
|
178
208
|
this.totalEmbeddings++;
|
|
179
209
|
this.totalLatencyMs += performance.now() - startTime;
|
|
180
|
-
// Cache result
|
|
210
|
+
// Cache result in memory
|
|
181
211
|
if (this.cacheEnabled) {
|
|
182
212
|
this.cache.set(text, embedding);
|
|
183
213
|
}
|
|
214
|
+
// Cache result persistently (for cross-session)
|
|
215
|
+
if (this.persistentCache && effectiveBackend === 'onnx') {
|
|
216
|
+
this.persistentCache.set(text, embedding, this.modelName);
|
|
217
|
+
}
|
|
184
218
|
return embedding;
|
|
185
219
|
}
|
|
186
220
|
/**
|
|
@@ -448,6 +482,19 @@ export class EmbeddingService {
|
|
|
448
482
|
getStats() {
|
|
449
483
|
const effective = this.effectiveBackend || this.backend;
|
|
450
484
|
const ruvectorStats = ruvectorModule?.getStats?.() || {};
|
|
485
|
+
// Get persistent cache stats
|
|
486
|
+
let persistentCacheStats;
|
|
487
|
+
if (this.persistentCache) {
|
|
488
|
+
const cacheStats = this.persistentCache.getStats();
|
|
489
|
+
persistentCacheStats = {
|
|
490
|
+
enabled: true,
|
|
491
|
+
entries: cacheStats.totalEntries,
|
|
492
|
+
hits: cacheStats.hits,
|
|
493
|
+
misses: cacheStats.misses,
|
|
494
|
+
hitRate: cacheStats.hitRate,
|
|
495
|
+
dbSizeKB: Math.round(cacheStats.dbSizeBytes / 1024),
|
|
496
|
+
};
|
|
497
|
+
}
|
|
451
498
|
return {
|
|
452
499
|
backend: this.backend,
|
|
453
500
|
effectiveBackend: effective,
|
|
@@ -460,14 +507,46 @@ export class EmbeddingService {
|
|
|
460
507
|
modelName: effective === 'onnx' ? this.modelName : undefined,
|
|
461
508
|
simdAvailable: ruvectorStats.simdAvailable ?? onnxAvailable,
|
|
462
509
|
parallelWorkers: ruvectorStats.workerCount ?? undefined,
|
|
510
|
+
persistentCache: persistentCacheStats,
|
|
463
511
|
};
|
|
464
512
|
}
|
|
465
513
|
/**
|
|
466
|
-
* Clear cache
|
|
514
|
+
* Clear in-memory cache
|
|
467
515
|
*/
|
|
468
516
|
clearCache() {
|
|
469
517
|
this.cache.clear();
|
|
470
518
|
}
|
|
519
|
+
/**
|
|
520
|
+
* Clear persistent cache (SQLite)
|
|
521
|
+
*/
|
|
522
|
+
clearPersistentCache() {
|
|
523
|
+
if (this.persistentCache) {
|
|
524
|
+
this.persistentCache.clear();
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
/**
|
|
528
|
+
* Clear all caches (memory + persistent)
|
|
529
|
+
*/
|
|
530
|
+
clearAllCaches() {
|
|
531
|
+
this.cache.clear();
|
|
532
|
+
if (this.persistentCache) {
|
|
533
|
+
this.persistentCache.clear();
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
/**
|
|
537
|
+
* Get persistent cache stats
|
|
538
|
+
*/
|
|
539
|
+
getPersistentCacheStats() {
|
|
540
|
+
if (!this.persistentCache)
|
|
541
|
+
return null;
|
|
542
|
+
const stats = this.persistentCache.getStats();
|
|
543
|
+
return {
|
|
544
|
+
entries: stats.totalEntries,
|
|
545
|
+
hits: stats.hits,
|
|
546
|
+
misses: stats.misses,
|
|
547
|
+
hitRate: stats.hitRate,
|
|
548
|
+
};
|
|
549
|
+
}
|
|
471
550
|
/**
|
|
472
551
|
* Clear corpus
|
|
473
552
|
*/
|
|
@@ -493,6 +572,879 @@ export class EmbeddingService {
|
|
|
493
572
|
onnxAvailable = null;
|
|
494
573
|
ruvectorModule = null;
|
|
495
574
|
}
|
|
575
|
+
/**
|
|
576
|
+
* Pretrain cache with texts from files
|
|
577
|
+
* Embeds content and stores in persistent cache for fast retrieval
|
|
578
|
+
*
|
|
579
|
+
* @param sources - File paths or glob patterns, or array of texts
|
|
580
|
+
* @param options - Pretrain options
|
|
581
|
+
* @returns Stats about pretraining
|
|
582
|
+
*/
|
|
583
|
+
async pretrain(sources, options = {}) {
|
|
584
|
+
const { batchSize = 32, onProgress, chunkSize = 512, overlapSize = 64, skipCached = true } = options;
|
|
585
|
+
const startTime = performance.now();
|
|
586
|
+
let processed = 0;
|
|
587
|
+
let cached = 0;
|
|
588
|
+
let skipped = 0;
|
|
589
|
+
// Resolve texts to embed
|
|
590
|
+
const texts = [];
|
|
591
|
+
if (typeof sources === 'string') {
|
|
592
|
+
sources = [sources];
|
|
593
|
+
}
|
|
594
|
+
for (const source of sources) {
|
|
595
|
+
// Check if it's a file path or glob pattern
|
|
596
|
+
if (source.includes('/') || source.includes('*') || source.includes('.')) {
|
|
597
|
+
try {
|
|
598
|
+
const fs = await import('fs');
|
|
599
|
+
const path = await import('path');
|
|
600
|
+
const { glob } = await import('glob').catch(() => ({ glob: null }));
|
|
601
|
+
// Handle glob patterns
|
|
602
|
+
let files = [];
|
|
603
|
+
if (source.includes('*') && glob) {
|
|
604
|
+
files = await glob(source);
|
|
605
|
+
}
|
|
606
|
+
else if (fs.existsSync(source)) {
|
|
607
|
+
files = [source];
|
|
608
|
+
}
|
|
609
|
+
for (const file of files) {
|
|
610
|
+
try {
|
|
611
|
+
const content = fs.readFileSync(file, 'utf-8');
|
|
612
|
+
// Chunk large files
|
|
613
|
+
if (content.length > chunkSize * 2) {
|
|
614
|
+
for (let i = 0; i < content.length; i += chunkSize - overlapSize) {
|
|
615
|
+
const chunk = content.slice(i, i + chunkSize);
|
|
616
|
+
if (chunk.trim().length > 10) {
|
|
617
|
+
texts.push(chunk);
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
else if (content.trim().length > 10) {
|
|
622
|
+
texts.push(content);
|
|
623
|
+
}
|
|
624
|
+
}
|
|
625
|
+
catch {
|
|
626
|
+
// Skip unreadable files
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
catch {
|
|
631
|
+
// Treat as plain text if file operations fail
|
|
632
|
+
texts.push(source);
|
|
633
|
+
}
|
|
634
|
+
}
|
|
635
|
+
else {
|
|
636
|
+
texts.push(source);
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
// Filter out already cached texts
|
|
640
|
+
const toEmbed = [];
|
|
641
|
+
for (const text of texts) {
|
|
642
|
+
if (skipCached && this.persistentCache?.has(text, this.modelName)) {
|
|
643
|
+
skipped++;
|
|
644
|
+
}
|
|
645
|
+
else {
|
|
646
|
+
toEmbed.push(text);
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
// Embed in batches
|
|
650
|
+
for (let i = 0; i < toEmbed.length; i += batchSize) {
|
|
651
|
+
const batch = toEmbed.slice(i, i + batchSize);
|
|
652
|
+
const embeddings = await this.embedBatch(batch);
|
|
653
|
+
// Store in persistent cache (embedBatch already handles this for ONNX)
|
|
654
|
+
cached += embeddings.length;
|
|
655
|
+
processed += batch.length;
|
|
656
|
+
if (onProgress) {
|
|
657
|
+
onProgress(processed, toEmbed.length);
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
return {
|
|
661
|
+
processed,
|
|
662
|
+
cached,
|
|
663
|
+
skipped,
|
|
664
|
+
timeMs: performance.now() - startTime,
|
|
665
|
+
};
|
|
666
|
+
}
|
|
667
|
+
/**
|
|
668
|
+
* Pretrain with common programming patterns
|
|
669
|
+
* Pre-caches embeddings for frequently used code patterns
|
|
670
|
+
*/
|
|
671
|
+
async pretrainCodePatterns() {
|
|
672
|
+
const patterns = [
|
|
673
|
+
// Common programming constructs
|
|
674
|
+
'function implementation',
|
|
675
|
+
'class definition',
|
|
676
|
+
'interface declaration',
|
|
677
|
+
'type alias',
|
|
678
|
+
'import statement',
|
|
679
|
+
'export module',
|
|
680
|
+
'async await pattern',
|
|
681
|
+
'promise handling',
|
|
682
|
+
'error handling try catch',
|
|
683
|
+
'conditional logic if else',
|
|
684
|
+
'loop iteration for while',
|
|
685
|
+
'array map filter reduce',
|
|
686
|
+
'object destructuring',
|
|
687
|
+
'spread operator',
|
|
688
|
+
'rest parameters',
|
|
689
|
+
// Code operations
|
|
690
|
+
'refactor code',
|
|
691
|
+
'fix bug',
|
|
692
|
+
'add feature',
|
|
693
|
+
'write tests',
|
|
694
|
+
'add documentation',
|
|
695
|
+
'optimize performance',
|
|
696
|
+
'improve readability',
|
|
697
|
+
'handle edge cases',
|
|
698
|
+
'add validation',
|
|
699
|
+
'implement authentication',
|
|
700
|
+
// File types
|
|
701
|
+
'TypeScript file',
|
|
702
|
+
'JavaScript module',
|
|
703
|
+
'React component',
|
|
704
|
+
'Vue component',
|
|
705
|
+
'CSS stylesheet',
|
|
706
|
+
'JSON configuration',
|
|
707
|
+
'Markdown documentation',
|
|
708
|
+
'Python script',
|
|
709
|
+
'Shell script',
|
|
710
|
+
'SQL query',
|
|
711
|
+
// Agent routing patterns
|
|
712
|
+
'code review task',
|
|
713
|
+
'architecture design',
|
|
714
|
+
'testing strategy',
|
|
715
|
+
'debugging session',
|
|
716
|
+
'performance analysis',
|
|
717
|
+
'security audit',
|
|
718
|
+
'documentation update',
|
|
719
|
+
'API design',
|
|
720
|
+
'database schema',
|
|
721
|
+
'deployment configuration',
|
|
722
|
+
];
|
|
723
|
+
const startTime = performance.now();
|
|
724
|
+
const embeddings = await this.embedBatch(patterns);
|
|
725
|
+
return {
|
|
726
|
+
cached: embeddings.length,
|
|
727
|
+
timeMs: performance.now() - startTime,
|
|
728
|
+
};
|
|
729
|
+
}
|
|
730
|
+
/**
|
|
731
|
+
* Pretrain from repository structure
|
|
732
|
+
* Analyzes file names and paths to pre-cache common patterns
|
|
733
|
+
*/
|
|
734
|
+
async pretrainFromRepo(repoPath = '.') {
|
|
735
|
+
const startTime = performance.now();
|
|
736
|
+
let files = 0;
|
|
737
|
+
let chunks = 0;
|
|
738
|
+
try {
|
|
739
|
+
const fs = await import('fs');
|
|
740
|
+
const path = await import('path');
|
|
741
|
+
// Common code file extensions
|
|
742
|
+
const extensions = ['.ts', '.tsx', '.js', '.jsx', '.py', '.md', '.json'];
|
|
743
|
+
const walkDir = (dir) => {
|
|
744
|
+
try {
|
|
745
|
+
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
746
|
+
for (const entry of entries) {
|
|
747
|
+
const fullPath = path.join(dir, entry.name);
|
|
748
|
+
if (entry.isDirectory()) {
|
|
749
|
+
// Skip node_modules, .git, etc.
|
|
750
|
+
if (!entry.name.startsWith('.') && entry.name !== 'node_modules' && entry.name !== 'dist') {
|
|
751
|
+
walkDir(fullPath);
|
|
752
|
+
}
|
|
753
|
+
}
|
|
754
|
+
else if (extensions.some(ext => entry.name.endsWith(ext))) {
|
|
755
|
+
return fullPath;
|
|
756
|
+
}
|
|
757
|
+
}
|
|
758
|
+
}
|
|
759
|
+
catch {
|
|
760
|
+
// Skip unreadable directories
|
|
761
|
+
}
|
|
762
|
+
return null;
|
|
763
|
+
};
|
|
764
|
+
// Collect files
|
|
765
|
+
const filePaths = [];
|
|
766
|
+
const collectFiles = (dir) => {
|
|
767
|
+
try {
|
|
768
|
+
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
769
|
+
for (const entry of entries) {
|
|
770
|
+
const fullPath = path.join(dir, entry.name);
|
|
771
|
+
if (entry.isDirectory()) {
|
|
772
|
+
if (!entry.name.startsWith('.') && entry.name !== 'node_modules' && entry.name !== 'dist') {
|
|
773
|
+
collectFiles(fullPath);
|
|
774
|
+
}
|
|
775
|
+
}
|
|
776
|
+
else if (extensions.some(ext => entry.name.endsWith(ext))) {
|
|
777
|
+
filePaths.push(fullPath);
|
|
778
|
+
}
|
|
779
|
+
}
|
|
780
|
+
}
|
|
781
|
+
catch {
|
|
782
|
+
// Skip unreadable
|
|
783
|
+
}
|
|
784
|
+
};
|
|
785
|
+
collectFiles(repoPath);
|
|
786
|
+
files = filePaths.length;
|
|
787
|
+
// Pretrain from collected files
|
|
788
|
+
if (filePaths.length > 0) {
|
|
789
|
+
const result = await this.pretrain(filePaths, {
|
|
790
|
+
batchSize: 16,
|
|
791
|
+
chunkSize: 512,
|
|
792
|
+
overlapSize: 64,
|
|
793
|
+
});
|
|
794
|
+
chunks = result.cached;
|
|
795
|
+
}
|
|
796
|
+
}
|
|
797
|
+
catch (err) {
|
|
798
|
+
// Repository analysis failed
|
|
799
|
+
}
|
|
800
|
+
return {
|
|
801
|
+
files,
|
|
802
|
+
chunks,
|
|
803
|
+
timeMs: performance.now() - startTime,
|
|
804
|
+
};
|
|
805
|
+
}
|
|
806
|
+
/**
|
|
807
|
+
* Incremental pretrain - only process changed files since last run
|
|
808
|
+
* Uses git diff to detect modified files
|
|
809
|
+
*/
|
|
810
|
+
async pretrainIncremental(options = {}) {
|
|
811
|
+
const { since = 'HEAD~10', repoPath = '.' } = options;
|
|
812
|
+
const startTime = performance.now();
|
|
813
|
+
let changedFiles = 0;
|
|
814
|
+
let newChunks = 0;
|
|
815
|
+
let skipped = 0;
|
|
816
|
+
try {
|
|
817
|
+
const { execSync } = await import('child_process');
|
|
818
|
+
const path = await import('path');
|
|
819
|
+
const fs = await import('fs');
|
|
820
|
+
// Get changed files from git
|
|
821
|
+
const gitOutput = execSync(`git diff --name-only ${since}`, {
|
|
822
|
+
cwd: repoPath,
|
|
823
|
+
encoding: 'utf-8',
|
|
824
|
+
});
|
|
825
|
+
const changedPaths = gitOutput
|
|
826
|
+
.split('\n')
|
|
827
|
+
.filter(f => f.trim())
|
|
828
|
+
.map(f => path.join(repoPath, f))
|
|
829
|
+
.filter(f => {
|
|
830
|
+
try {
|
|
831
|
+
return fs.existsSync(f) && fs.statSync(f).isFile();
|
|
832
|
+
}
|
|
833
|
+
catch {
|
|
834
|
+
return false;
|
|
835
|
+
}
|
|
836
|
+
});
|
|
837
|
+
changedFiles = changedPaths.length;
|
|
838
|
+
if (changedPaths.length > 0) {
|
|
839
|
+
const result = await this.pretrain(changedPaths, {
|
|
840
|
+
batchSize: 16,
|
|
841
|
+
chunkSize: 512,
|
|
842
|
+
overlapSize: 64,
|
|
843
|
+
skipCached: true,
|
|
844
|
+
});
|
|
845
|
+
newChunks = result.cached;
|
|
846
|
+
skipped = result.skipped;
|
|
847
|
+
}
|
|
848
|
+
}
|
|
849
|
+
catch {
|
|
850
|
+
// Git not available or not a repo
|
|
851
|
+
}
|
|
852
|
+
return {
|
|
853
|
+
changedFiles,
|
|
854
|
+
newChunks,
|
|
855
|
+
skipped,
|
|
856
|
+
timeMs: performance.now() - startTime,
|
|
857
|
+
};
|
|
858
|
+
}
|
|
859
|
+
/**
|
|
860
|
+
* Smart chunking - split code by semantic boundaries
|
|
861
|
+
* (functions, classes, etc.) instead of fixed size
|
|
862
|
+
*/
|
|
863
|
+
semanticChunk(content, fileType) {
|
|
864
|
+
const chunks = [];
|
|
865
|
+
// TypeScript/JavaScript patterns
|
|
866
|
+
if (['.ts', '.tsx', '.js', '.jsx'].some(ext => fileType.endsWith(ext))) {
|
|
867
|
+
// Split on function/class/interface boundaries
|
|
868
|
+
const patterns = [
|
|
869
|
+
/^(export\s+)?(async\s+)?function\s+\w+/gm,
|
|
870
|
+
/^(export\s+)?class\s+\w+/gm,
|
|
871
|
+
/^(export\s+)?interface\s+\w+/gm,
|
|
872
|
+
/^(export\s+)?type\s+\w+/gm,
|
|
873
|
+
/^(export\s+)?const\s+\w+\s*=/gm,
|
|
874
|
+
];
|
|
875
|
+
let lastIndex = 0;
|
|
876
|
+
const boundaries = [0];
|
|
877
|
+
for (const pattern of patterns) {
|
|
878
|
+
let match;
|
|
879
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
880
|
+
boundaries.push(match.index);
|
|
881
|
+
}
|
|
882
|
+
}
|
|
883
|
+
boundaries.push(content.length);
|
|
884
|
+
boundaries.sort((a, b) => a - b);
|
|
885
|
+
// Extract chunks between boundaries
|
|
886
|
+
for (let i = 0; i < boundaries.length - 1; i++) {
|
|
887
|
+
const chunk = content.slice(boundaries[i], boundaries[i + 1]).trim();
|
|
888
|
+
if (chunk.length > 20 && chunk.length < 2000) {
|
|
889
|
+
chunks.push(chunk);
|
|
890
|
+
}
|
|
891
|
+
}
|
|
892
|
+
}
|
|
893
|
+
// Python patterns
|
|
894
|
+
else if (fileType.endsWith('.py')) {
|
|
895
|
+
const patterns = [
|
|
896
|
+
/^(async\s+)?def\s+\w+/gm,
|
|
897
|
+
/^class\s+\w+/gm,
|
|
898
|
+
];
|
|
899
|
+
const boundaries = [0];
|
|
900
|
+
for (const pattern of patterns) {
|
|
901
|
+
let match;
|
|
902
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
903
|
+
boundaries.push(match.index);
|
|
904
|
+
}
|
|
905
|
+
}
|
|
906
|
+
boundaries.push(content.length);
|
|
907
|
+
boundaries.sort((a, b) => a - b);
|
|
908
|
+
for (let i = 0; i < boundaries.length - 1; i++) {
|
|
909
|
+
const chunk = content.slice(boundaries[i], boundaries[i + 1]).trim();
|
|
910
|
+
if (chunk.length > 20 && chunk.length < 2000) {
|
|
911
|
+
chunks.push(chunk);
|
|
912
|
+
}
|
|
913
|
+
}
|
|
914
|
+
}
|
|
915
|
+
// Markdown - split by headers
|
|
916
|
+
else if (fileType.endsWith('.md')) {
|
|
917
|
+
const sections = content.split(/^#+\s+/gm);
|
|
918
|
+
for (const section of sections) {
|
|
919
|
+
if (section.trim().length > 20) {
|
|
920
|
+
chunks.push(section.trim().slice(0, 1000));
|
|
921
|
+
}
|
|
922
|
+
}
|
|
923
|
+
}
|
|
924
|
+
// Fallback to fixed-size chunking
|
|
925
|
+
if (chunks.length === 0) {
|
|
926
|
+
const chunkSize = 512;
|
|
927
|
+
const overlap = 64;
|
|
928
|
+
for (let i = 0; i < content.length; i += chunkSize - overlap) {
|
|
929
|
+
const chunk = content.slice(i, i + chunkSize);
|
|
930
|
+
if (chunk.trim().length > 20) {
|
|
931
|
+
chunks.push(chunk);
|
|
932
|
+
}
|
|
933
|
+
}
|
|
934
|
+
}
|
|
935
|
+
return chunks;
|
|
936
|
+
}
|
|
937
|
+
/**
|
|
938
|
+
* Pretrain with semantic chunking
|
|
939
|
+
* Uses code structure to create meaningful chunks
|
|
940
|
+
*/
|
|
941
|
+
async pretrainSemantic(sources, options = {}) {
|
|
942
|
+
const { batchSize = 32, onProgress } = options;
|
|
943
|
+
const startTime = performance.now();
|
|
944
|
+
let fileCount = 0;
|
|
945
|
+
let chunkCount = 0;
|
|
946
|
+
const allChunks = [];
|
|
947
|
+
try {
|
|
948
|
+
const fs = await import('fs');
|
|
949
|
+
const path = await import('path');
|
|
950
|
+
for (const source of sources) {
|
|
951
|
+
if (fs.existsSync(source)) {
|
|
952
|
+
try {
|
|
953
|
+
const content = fs.readFileSync(source, 'utf-8');
|
|
954
|
+
const ext = path.extname(source);
|
|
955
|
+
const chunks = this.semanticChunk(content, ext);
|
|
956
|
+
allChunks.push(...chunks);
|
|
957
|
+
fileCount++;
|
|
958
|
+
}
|
|
959
|
+
catch {
|
|
960
|
+
// Skip unreadable files
|
|
961
|
+
}
|
|
962
|
+
}
|
|
963
|
+
}
|
|
964
|
+
// Embed and cache all chunks
|
|
965
|
+
for (let i = 0; i < allChunks.length; i += batchSize) {
|
|
966
|
+
const batch = allChunks.slice(i, i + batchSize);
|
|
967
|
+
await this.embedBatch(batch);
|
|
968
|
+
chunkCount += batch.length;
|
|
969
|
+
if (onProgress) {
|
|
970
|
+
onProgress(chunkCount, allChunks.length);
|
|
971
|
+
}
|
|
972
|
+
}
|
|
973
|
+
}
|
|
974
|
+
catch {
|
|
975
|
+
// Pretrain failed
|
|
976
|
+
}
|
|
977
|
+
return {
|
|
978
|
+
files: fileCount,
|
|
979
|
+
chunks: chunkCount,
|
|
980
|
+
timeMs: performance.now() - startTime,
|
|
981
|
+
};
|
|
982
|
+
}
|
|
983
|
+
/**
|
|
984
|
+
* Priority pretrain - cache most frequently used patterns first
|
|
985
|
+
* Tracks access patterns and prioritizes high-frequency queries
|
|
986
|
+
*/
|
|
987
|
+
accessCounts = new Map();
|
|
988
|
+
recordAccess(text) {
|
|
989
|
+
this.accessCounts.set(text, (this.accessCounts.get(text) || 0) + 1);
|
|
990
|
+
}
|
|
991
|
+
getTopPatterns(n = 100) {
|
|
992
|
+
return Array.from(this.accessCounts.entries())
|
|
993
|
+
.sort((a, b) => b[1] - a[1])
|
|
994
|
+
.slice(0, n)
|
|
995
|
+
.map(([text]) => text);
|
|
996
|
+
}
|
|
997
|
+
async pretrainPriority(n = 100) {
|
|
998
|
+
const topPatterns = this.getTopPatterns(n);
|
|
999
|
+
const startTime = performance.now();
|
|
1000
|
+
if (topPatterns.length > 0) {
|
|
1001
|
+
await this.embedBatch(topPatterns);
|
|
1002
|
+
}
|
|
1003
|
+
return {
|
|
1004
|
+
cached: topPatterns.length,
|
|
1005
|
+
timeMs: performance.now() - startTime,
|
|
1006
|
+
};
|
|
1007
|
+
}
|
|
1008
|
+
/**
|
|
1009
|
+
* Warmup cache on session start
|
|
1010
|
+
* Combines code patterns + recent repo changes
|
|
1011
|
+
*/
|
|
1012
|
+
async warmup(repoPath = '.') {
|
|
1013
|
+
const startTime = performance.now();
|
|
1014
|
+
// First: load common patterns
|
|
1015
|
+
const patternResult = await this.pretrainCodePatterns();
|
|
1016
|
+
// Second: load recent git changes
|
|
1017
|
+
const incrementalResult = await this.pretrainIncremental({
|
|
1018
|
+
since: 'HEAD~5',
|
|
1019
|
+
repoPath,
|
|
1020
|
+
});
|
|
1021
|
+
return {
|
|
1022
|
+
patterns: patternResult.cached,
|
|
1023
|
+
recentChanges: incrementalResult.newChunks,
|
|
1024
|
+
timeMs: performance.now() - startTime,
|
|
1025
|
+
};
|
|
1026
|
+
}
|
|
1027
|
+
/**
|
|
1028
|
+
* Intelligent pretrain using ruvector worker pool
|
|
1029
|
+
* Analyzes repo structure, code patterns, and prepares cache
|
|
1030
|
+
* Uses parallel workers for maximum throughput
|
|
1031
|
+
*/
|
|
1032
|
+
async pretrainIntelligent(options = {}) {
|
|
1033
|
+
const { repoPath = '.', parallel = true, onProgress } = options;
|
|
1034
|
+
const startTime = performance.now();
|
|
1035
|
+
const stages = {
|
|
1036
|
+
codePatterns: { count: 0, timeMs: 0 },
|
|
1037
|
+
astAnalysis: { files: 0, functions: 0, timeMs: 0 },
|
|
1038
|
+
gitHistory: { commits: 0, hotFiles: 0, timeMs: 0 },
|
|
1039
|
+
dependencies: { modules: 0, imports: 0, timeMs: 0 },
|
|
1040
|
+
semanticChunks: { chunks: 0, timeMs: 0 },
|
|
1041
|
+
};
|
|
1042
|
+
let totalCached = 0;
|
|
1043
|
+
try {
|
|
1044
|
+
// Stage 1: Code patterns (common programming patterns)
|
|
1045
|
+
onProgress?.('codePatterns', 0);
|
|
1046
|
+
const stage1Start = performance.now();
|
|
1047
|
+
const patternResult = await this.pretrainCodePatterns();
|
|
1048
|
+
stages.codePatterns = {
|
|
1049
|
+
count: patternResult.cached,
|
|
1050
|
+
timeMs: performance.now() - stage1Start,
|
|
1051
|
+
};
|
|
1052
|
+
totalCached += patternResult.cached;
|
|
1053
|
+
onProgress?.('codePatterns', 100);
|
|
1054
|
+
// Stage 2: AST Analysis using ruvector workers (if available)
|
|
1055
|
+
onProgress?.('astAnalysis', 0);
|
|
1056
|
+
const stage2Start = performance.now();
|
|
1057
|
+
try {
|
|
1058
|
+
if (ruvectorModule && parallel) {
|
|
1059
|
+
// Use ruvector's analyzeFilesParallel if available
|
|
1060
|
+
const mod = ruvectorModule;
|
|
1061
|
+
if (mod.analyzeFilesParallel) {
|
|
1062
|
+
const fs = await import('fs');
|
|
1063
|
+
const path = await import('path');
|
|
1064
|
+
// Collect source files
|
|
1065
|
+
const sourceFiles = [];
|
|
1066
|
+
const collectSources = (dir) => {
|
|
1067
|
+
try {
|
|
1068
|
+
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
1069
|
+
for (const entry of entries) {
|
|
1070
|
+
const fullPath = path.join(dir, entry.name);
|
|
1071
|
+
if (entry.isDirectory()) {
|
|
1072
|
+
if (!entry.name.startsWith('.') && entry.name !== 'node_modules' && entry.name !== 'dist') {
|
|
1073
|
+
collectSources(fullPath);
|
|
1074
|
+
}
|
|
1075
|
+
}
|
|
1076
|
+
else if (['.ts', '.tsx', '.js', '.jsx'].some(ext => entry.name.endsWith(ext))) {
|
|
1077
|
+
sourceFiles.push(fullPath);
|
|
1078
|
+
}
|
|
1079
|
+
}
|
|
1080
|
+
}
|
|
1081
|
+
catch { }
|
|
1082
|
+
};
|
|
1083
|
+
collectSources(repoPath);
|
|
1084
|
+
// Analyze in parallel
|
|
1085
|
+
const astResult = await mod.analyzeFilesParallel(sourceFiles.slice(0, 100));
|
|
1086
|
+
stages.astAnalysis = {
|
|
1087
|
+
files: sourceFiles.length,
|
|
1088
|
+
functions: astResult?.functions || 0,
|
|
1089
|
+
timeMs: performance.now() - stage2Start,
|
|
1090
|
+
};
|
|
1091
|
+
// Extract function signatures for caching
|
|
1092
|
+
if (astResult?.signatures) {
|
|
1093
|
+
await this.embedBatch(astResult.signatures.slice(0, 200));
|
|
1094
|
+
totalCached += Math.min(astResult.signatures.length, 200);
|
|
1095
|
+
}
|
|
1096
|
+
}
|
|
1097
|
+
}
|
|
1098
|
+
}
|
|
1099
|
+
catch { }
|
|
1100
|
+
onProgress?.('astAnalysis', 100);
|
|
1101
|
+
// Stage 3: Git history analysis (hot files = frequently changed)
|
|
1102
|
+
onProgress?.('gitHistory', 0);
|
|
1103
|
+
const stage3Start = performance.now();
|
|
1104
|
+
try {
|
|
1105
|
+
const { execSync } = await import('child_process');
|
|
1106
|
+
// Get commit count
|
|
1107
|
+
const commitCount = execSync('git rev-list --count HEAD', {
|
|
1108
|
+
cwd: repoPath,
|
|
1109
|
+
encoding: 'utf-8',
|
|
1110
|
+
}).trim();
|
|
1111
|
+
// Get hot files (most frequently changed)
|
|
1112
|
+
const hotFilesOutput = execSync('git log --format="" --name-only -n 100 | sort | uniq -c | sort -rn | head -20', { cwd: repoPath, encoding: 'utf-8' });
|
|
1113
|
+
const hotFiles = hotFilesOutput
|
|
1114
|
+
.split('\n')
|
|
1115
|
+
.filter(l => l.trim())
|
|
1116
|
+
.map(l => l.trim().split(/\s+/).slice(1).join(' '))
|
|
1117
|
+
.filter(f => f);
|
|
1118
|
+
stages.gitHistory = {
|
|
1119
|
+
commits: parseInt(commitCount) || 0,
|
|
1120
|
+
hotFiles: hotFiles.length,
|
|
1121
|
+
timeMs: performance.now() - stage3Start,
|
|
1122
|
+
};
|
|
1123
|
+
// Pretrain hot files
|
|
1124
|
+
if (hotFiles.length > 0) {
|
|
1125
|
+
const fs = await import('fs');
|
|
1126
|
+
const path = await import('path');
|
|
1127
|
+
const validFiles = hotFiles
|
|
1128
|
+
.map(f => path.join(repoPath, f))
|
|
1129
|
+
.filter(f => fs.existsSync(f));
|
|
1130
|
+
if (validFiles.length > 0) {
|
|
1131
|
+
const result = await this.pretrainSemantic(validFiles, { batchSize: 16 });
|
|
1132
|
+
totalCached += result.chunks;
|
|
1133
|
+
}
|
|
1134
|
+
}
|
|
1135
|
+
}
|
|
1136
|
+
catch { }
|
|
1137
|
+
onProgress?.('gitHistory', 100);
|
|
1138
|
+
// Stage 4: Dependency analysis
|
|
1139
|
+
onProgress?.('dependencies', 0);
|
|
1140
|
+
const stage4Start = performance.now();
|
|
1141
|
+
try {
|
|
1142
|
+
const fs = await import('fs');
|
|
1143
|
+
const path = await import('path');
|
|
1144
|
+
// Parse package.json for dependencies
|
|
1145
|
+
const pkgPath = path.join(repoPath, 'package.json');
|
|
1146
|
+
if (fs.existsSync(pkgPath)) {
|
|
1147
|
+
const pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf-8'));
|
|
1148
|
+
const deps = Object.keys(pkg.dependencies || {});
|
|
1149
|
+
const devDeps = Object.keys(pkg.devDependencies || {});
|
|
1150
|
+
const allDeps = [...deps, ...devDeps];
|
|
1151
|
+
stages.dependencies = {
|
|
1152
|
+
modules: allDeps.length,
|
|
1153
|
+
imports: 0,
|
|
1154
|
+
timeMs: performance.now() - stage4Start,
|
|
1155
|
+
};
|
|
1156
|
+
// Cache dependency names for import resolution
|
|
1157
|
+
if (allDeps.length > 0) {
|
|
1158
|
+
const depPatterns = allDeps.map(d => `import from ${d}`);
|
|
1159
|
+
await this.embedBatch(depPatterns);
|
|
1160
|
+
totalCached += depPatterns.length;
|
|
1161
|
+
}
|
|
1162
|
+
}
|
|
1163
|
+
}
|
|
1164
|
+
catch { }
|
|
1165
|
+
onProgress?.('dependencies', 100);
|
|
1166
|
+
// Stage 5: Semantic chunking with parallel embedding
|
|
1167
|
+
onProgress?.('semanticChunks', 0);
|
|
1168
|
+
const stage5Start = performance.now();
|
|
1169
|
+
try {
|
|
1170
|
+
const incrementalResult = await this.pretrainIncremental({
|
|
1171
|
+
since: 'HEAD~20',
|
|
1172
|
+
repoPath,
|
|
1173
|
+
});
|
|
1174
|
+
stages.semanticChunks = {
|
|
1175
|
+
chunks: incrementalResult.newChunks,
|
|
1176
|
+
timeMs: performance.now() - stage5Start,
|
|
1177
|
+
};
|
|
1178
|
+
totalCached += incrementalResult.newChunks;
|
|
1179
|
+
}
|
|
1180
|
+
catch { }
|
|
1181
|
+
onProgress?.('semanticChunks', 100);
|
|
1182
|
+
}
|
|
1183
|
+
catch (err) {
|
|
1184
|
+
// Pretrain failed, return partial results
|
|
1185
|
+
}
|
|
1186
|
+
return {
|
|
1187
|
+
stages,
|
|
1188
|
+
totalCached,
|
|
1189
|
+
totalTimeMs: performance.now() - startTime,
|
|
1190
|
+
};
|
|
1191
|
+
}
|
|
1192
|
+
/**
|
|
1193
|
+
* Background pretrain - runs in worker if available
|
|
1194
|
+
* Non-blocking, returns immediately with a promise
|
|
1195
|
+
*/
|
|
1196
|
+
pretrainBackground(options = {}) {
|
|
1197
|
+
let cancelled = false;
|
|
1198
|
+
const promise = (async () => {
|
|
1199
|
+
if (cancelled)
|
|
1200
|
+
return;
|
|
1201
|
+
// Run warmup in background
|
|
1202
|
+
await this.warmup(options.repoPath);
|
|
1203
|
+
if (cancelled)
|
|
1204
|
+
return;
|
|
1205
|
+
// Then run intelligent pretrain
|
|
1206
|
+
await this.pretrainIntelligent({
|
|
1207
|
+
...options,
|
|
1208
|
+
parallel: true,
|
|
1209
|
+
});
|
|
1210
|
+
})();
|
|
1211
|
+
return {
|
|
1212
|
+
promise,
|
|
1213
|
+
cancel: () => { cancelled = true; },
|
|
1214
|
+
};
|
|
1215
|
+
}
|
|
1216
|
+
/**
|
|
1217
|
+
* AI-enhanced pretrain using ruvector attention mechanisms
|
|
1218
|
+
* Uses HyperbolicAttention for code structure, MoE for routing
|
|
1219
|
+
*/
|
|
1220
|
+
async pretrainWithAI(options = {}) {
|
|
1221
|
+
const { repoPath = '.', attentionType = 'auto', onProgress } = options;
|
|
1222
|
+
const startTime = performance.now();
|
|
1223
|
+
const patterns = [];
|
|
1224
|
+
let totalCached = 0;
|
|
1225
|
+
let attentionInfo = { type: 'none', timeMs: 0 };
|
|
1226
|
+
let predictions = { prefetch: 0, confidence: 0 };
|
|
1227
|
+
try {
|
|
1228
|
+
const mod = ruvectorModule;
|
|
1229
|
+
// Step 1: Determine best attention type for codebase
|
|
1230
|
+
onProgress?.('attention', 'Selecting optimal attention mechanism...');
|
|
1231
|
+
let selectedAttention = attentionType;
|
|
1232
|
+
if (attentionType === 'auto' && mod) {
|
|
1233
|
+
// Use getAttentionForUseCase if available
|
|
1234
|
+
if (mod.getAttentionForUseCase) {
|
|
1235
|
+
const result = await mod.getAttentionForUseCase('code_analysis');
|
|
1236
|
+
selectedAttention = result?.type || 'hyperbolic';
|
|
1237
|
+
}
|
|
1238
|
+
else {
|
|
1239
|
+
// Default to hyperbolic for hierarchical code structure
|
|
1240
|
+
selectedAttention = 'hyperbolic';
|
|
1241
|
+
}
|
|
1242
|
+
}
|
|
1243
|
+
attentionInfo.type = selectedAttention;
|
|
1244
|
+
const attentionStart = performance.now();
|
|
1245
|
+
// Step 2: Use attention to identify important code regions
|
|
1246
|
+
onProgress?.('analysis', `Using ${selectedAttention} attention for code analysis...`);
|
|
1247
|
+
if (mod) {
|
|
1248
|
+
// Collect code samples for attention-based analysis
|
|
1249
|
+
const fs = await import('fs');
|
|
1250
|
+
const path = await import('path');
|
|
1251
|
+
const codeSamples = [];
|
|
1252
|
+
const collectCode = (dir, maxFiles = 50) => {
|
|
1253
|
+
if (codeSamples.length >= maxFiles)
|
|
1254
|
+
return;
|
|
1255
|
+
try {
|
|
1256
|
+
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
1257
|
+
for (const entry of entries) {
|
|
1258
|
+
if (codeSamples.length >= maxFiles)
|
|
1259
|
+
break;
|
|
1260
|
+
const fullPath = path.join(dir, entry.name);
|
|
1261
|
+
if (entry.isDirectory()) {
|
|
1262
|
+
if (!entry.name.startsWith('.') && entry.name !== 'node_modules' && entry.name !== 'dist') {
|
|
1263
|
+
collectCode(fullPath, maxFiles);
|
|
1264
|
+
}
|
|
1265
|
+
}
|
|
1266
|
+
else if (['.ts', '.tsx', '.js', '.jsx'].some(ext => entry.name.endsWith(ext))) {
|
|
1267
|
+
try {
|
|
1268
|
+
const content = fs.readFileSync(fullPath, 'utf-8');
|
|
1269
|
+
if (content.length < 5000) {
|
|
1270
|
+
codeSamples.push(content);
|
|
1271
|
+
}
|
|
1272
|
+
}
|
|
1273
|
+
catch { }
|
|
1274
|
+
}
|
|
1275
|
+
}
|
|
1276
|
+
}
|
|
1277
|
+
catch { }
|
|
1278
|
+
};
|
|
1279
|
+
collectCode(repoPath);
|
|
1280
|
+
// Step 3: Use attention mechanisms to weight code importance
|
|
1281
|
+
if (mod.HyperbolicAttention && selectedAttention === 'hyperbolic') {
|
|
1282
|
+
try {
|
|
1283
|
+
// Hyperbolic attention for hierarchical code structure
|
|
1284
|
+
const attention = new mod.HyperbolicAttention({ dim: 384 });
|
|
1285
|
+
// Identify structural patterns (classes, functions, imports)
|
|
1286
|
+
const structuralPatterns = [
|
|
1287
|
+
'class definition with constructor',
|
|
1288
|
+
'async function with error handling',
|
|
1289
|
+
'interface with multiple properties',
|
|
1290
|
+
'type with generics',
|
|
1291
|
+
'import statement block',
|
|
1292
|
+
'export default component',
|
|
1293
|
+
'hook implementation useEffect',
|
|
1294
|
+
'API endpoint handler',
|
|
1295
|
+
'database query function',
|
|
1296
|
+
'authentication middleware',
|
|
1297
|
+
];
|
|
1298
|
+
await this.embedBatch(structuralPatterns);
|
|
1299
|
+
patterns.push({ type: 'structural', count: structuralPatterns.length });
|
|
1300
|
+
totalCached += structuralPatterns.length;
|
|
1301
|
+
}
|
|
1302
|
+
catch { }
|
|
1303
|
+
}
|
|
1304
|
+
if (mod.MoEAttention && selectedAttention === 'moe') {
|
|
1305
|
+
try {
|
|
1306
|
+
// MoE for routing different code patterns to experts
|
|
1307
|
+
const routingPatterns = [
|
|
1308
|
+
// Expert 1: Frontend
|
|
1309
|
+
'React component with state',
|
|
1310
|
+
'Vue component with props',
|
|
1311
|
+
'CSS styling module',
|
|
1312
|
+
// Expert 2: Backend
|
|
1313
|
+
'Express route handler',
|
|
1314
|
+
'GraphQL resolver',
|
|
1315
|
+
'REST API endpoint',
|
|
1316
|
+
// Expert 3: Data
|
|
1317
|
+
'SQL query builder',
|
|
1318
|
+
'MongoDB aggregation',
|
|
1319
|
+
'Redis cache operation',
|
|
1320
|
+
// Expert 4: Testing
|
|
1321
|
+
'Jest test case',
|
|
1322
|
+
'E2E test scenario',
|
|
1323
|
+
'Mock implementation',
|
|
1324
|
+
];
|
|
1325
|
+
await this.embedBatch(routingPatterns);
|
|
1326
|
+
patterns.push({ type: 'routing', count: routingPatterns.length });
|
|
1327
|
+
totalCached += routingPatterns.length;
|
|
1328
|
+
}
|
|
1329
|
+
catch { }
|
|
1330
|
+
}
|
|
1331
|
+
if (mod.GraphRoPeAttention && selectedAttention === 'graph') {
|
|
1332
|
+
try {
|
|
1333
|
+
// Graph attention for dependency understanding
|
|
1334
|
+
const graphPatterns = [
|
|
1335
|
+
'module exports',
|
|
1336
|
+
'circular dependency',
|
|
1337
|
+
'shared utility import',
|
|
1338
|
+
'type re-export',
|
|
1339
|
+
'barrel file index',
|
|
1340
|
+
'lazy import dynamic',
|
|
1341
|
+
'peer dependency',
|
|
1342
|
+
'optional dependency',
|
|
1343
|
+
];
|
|
1344
|
+
await this.embedBatch(graphPatterns);
|
|
1345
|
+
patterns.push({ type: 'graph', count: graphPatterns.length });
|
|
1346
|
+
totalCached += graphPatterns.length;
|
|
1347
|
+
}
|
|
1348
|
+
catch { }
|
|
1349
|
+
}
|
|
1350
|
+
attentionInfo.timeMs = performance.now() - attentionStart;
|
|
1351
|
+
// Step 4: FastGRNN for pattern prediction (if available)
|
|
1352
|
+
onProgress?.('prediction', 'Training pattern predictor...');
|
|
1353
|
+
if (mod.FastGRNN) {
|
|
1354
|
+
try {
|
|
1355
|
+
// Use recent access patterns to predict what's needed next
|
|
1356
|
+
const topPatterns = this.getTopPatterns(50);
|
|
1357
|
+
if (topPatterns.length > 0) {
|
|
1358
|
+
// Prefetch predicted patterns
|
|
1359
|
+
const prefetchPatterns = [
|
|
1360
|
+
...topPatterns.slice(0, 20),
|
|
1361
|
+
// Add related patterns
|
|
1362
|
+
...topPatterns.slice(0, 10).map(p => `similar to: ${p}`),
|
|
1363
|
+
];
|
|
1364
|
+
await this.embedBatch(prefetchPatterns);
|
|
1365
|
+
predictions = {
|
|
1366
|
+
prefetch: prefetchPatterns.length,
|
|
1367
|
+
confidence: 0.85, // Estimated based on access history
|
|
1368
|
+
};
|
|
1369
|
+
totalCached += prefetchPatterns.length;
|
|
1370
|
+
}
|
|
1371
|
+
}
|
|
1372
|
+
catch { }
|
|
1373
|
+
}
|
|
1374
|
+
}
|
|
1375
|
+
// Step 5: Standard warmup
|
|
1376
|
+
onProgress?.('warmup', 'Running standard warmup...');
|
|
1377
|
+
const warmupResult = await this.warmup(repoPath);
|
|
1378
|
+
totalCached += warmupResult.patterns + warmupResult.recentChanges;
|
|
1379
|
+
patterns.push({ type: 'warmup', count: warmupResult.patterns + warmupResult.recentChanges });
|
|
1380
|
+
}
|
|
1381
|
+
catch (err) {
|
|
1382
|
+
// AI pretrain failed, continue with basic
|
|
1383
|
+
}
|
|
1384
|
+
return {
|
|
1385
|
+
patterns,
|
|
1386
|
+
attention: attentionInfo,
|
|
1387
|
+
predictions,
|
|
1388
|
+
totalCached,
|
|
1389
|
+
totalTimeMs: performance.now() - startTime,
|
|
1390
|
+
};
|
|
1391
|
+
}
|
|
1392
|
+
/**
|
|
1393
|
+
* Context-aware prefetch using attention
|
|
1394
|
+
* Predicts what embeddings will be needed based on current context
|
|
1395
|
+
*/
|
|
1396
|
+
async prefetchForContext(context) {
|
|
1397
|
+
const startTime = performance.now();
|
|
1398
|
+
let prefetched = 0;
|
|
1399
|
+
let confidence = 0;
|
|
1400
|
+
try {
|
|
1401
|
+
const patterns = [];
|
|
1402
|
+
// Add patterns based on current file type
|
|
1403
|
+
if (context.currentFile) {
|
|
1404
|
+
const ext = context.currentFile.split('.').pop() || '';
|
|
1405
|
+
const filePatterns = {
|
|
1406
|
+
ts: ['TypeScript type checking', 'interface implementation', 'generic types'],
|
|
1407
|
+
tsx: ['React component', 'JSX rendering', 'hook usage'],
|
|
1408
|
+
js: ['JavaScript module', 'CommonJS require', 'ES6 import'],
|
|
1409
|
+
jsx: ['React component', 'JSX element', 'props handling'],
|
|
1410
|
+
py: ['Python function', 'class method', 'import statement'],
|
|
1411
|
+
md: ['documentation', 'README section', 'code example'],
|
|
1412
|
+
};
|
|
1413
|
+
patterns.push(...(filePatterns[ext] || []));
|
|
1414
|
+
}
|
|
1415
|
+
// Add patterns based on task type
|
|
1416
|
+
if (context.taskType) {
|
|
1417
|
+
const taskPatterns = {
|
|
1418
|
+
edit: ['code modification', 'variable rename', 'function update'],
|
|
1419
|
+
review: ['code review', 'bug detection', 'style check'],
|
|
1420
|
+
debug: ['error trace', 'stack analysis', 'variable inspection'],
|
|
1421
|
+
test: ['test case', 'assertion', 'mock setup'],
|
|
1422
|
+
refactor: ['code cleanup', 'pattern extraction', 'abstraction'],
|
|
1423
|
+
};
|
|
1424
|
+
patterns.push(...(taskPatterns[context.taskType] || []));
|
|
1425
|
+
}
|
|
1426
|
+
// Add patterns based on user query similarity
|
|
1427
|
+
if (context.userQuery) {
|
|
1428
|
+
patterns.push(context.userQuery);
|
|
1429
|
+
// Add variations
|
|
1430
|
+
patterns.push(`how to ${context.userQuery}`);
|
|
1431
|
+
patterns.push(`implement ${context.userQuery}`);
|
|
1432
|
+
}
|
|
1433
|
+
if (patterns.length > 0) {
|
|
1434
|
+
await this.embedBatch(patterns);
|
|
1435
|
+
prefetched = patterns.length;
|
|
1436
|
+
confidence = Math.min(0.9, 0.5 + patterns.length * 0.05);
|
|
1437
|
+
}
|
|
1438
|
+
}
|
|
1439
|
+
catch {
|
|
1440
|
+
// Prefetch failed
|
|
1441
|
+
}
|
|
1442
|
+
return {
|
|
1443
|
+
prefetched,
|
|
1444
|
+
confidence,
|
|
1445
|
+
timeMs: performance.now() - startTime,
|
|
1446
|
+
};
|
|
1447
|
+
}
|
|
496
1448
|
}
|
|
497
1449
|
// Export singleton getter
|
|
498
1450
|
export function getEmbeddingService() {
|
|
@@ -505,6 +1457,12 @@ export async function embed(text) {
|
|
|
505
1457
|
export async function embedBatch(texts) {
|
|
506
1458
|
return getEmbeddingService().embedBatch(texts);
|
|
507
1459
|
}
|
|
1460
|
+
export async function pretrainCodePatterns() {
|
|
1461
|
+
return getEmbeddingService().pretrainCodePatterns();
|
|
1462
|
+
}
|
|
1463
|
+
export async function pretrainFromRepo(repoPath = '.') {
|
|
1464
|
+
return getEmbeddingService().pretrainFromRepo(repoPath);
|
|
1465
|
+
}
|
|
508
1466
|
export async function textSimilarity(text1, text2) {
|
|
509
1467
|
return getEmbeddingService().similarity(text1, text2);
|
|
510
1468
|
}
|