agentic-flow 2.0.1-alpha.18 → 2.0.1-alpha.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +38 -0
- package/dist/.tsbuildinfo +1 -1
- package/dist/intelligence/EmbeddingService.d.ts +182 -0
- package/dist/intelligence/EmbeddingService.d.ts.map +1 -1
- package/dist/intelligence/EmbeddingService.js +879 -0
- package/dist/intelligence/EmbeddingService.js.map +1 -1
- package/package.json +1 -1
- package/wasm/reasoningbank/reasoningbank_wasm_bg.js +2 -2
- package/wasm/reasoningbank/reasoningbank_wasm_bg.wasm +0 -0
|
@@ -572,6 +572,879 @@ export class EmbeddingService {
|
|
|
572
572
|
onnxAvailable = null;
|
|
573
573
|
ruvectorModule = null;
|
|
574
574
|
}
|
|
575
|
+
/**
|
|
576
|
+
* Pretrain cache with texts from files
|
|
577
|
+
* Embeds content and stores in persistent cache for fast retrieval
|
|
578
|
+
*
|
|
579
|
+
* @param sources - File paths or glob patterns, or array of texts
|
|
580
|
+
* @param options - Pretrain options
|
|
581
|
+
* @returns Stats about pretraining
|
|
582
|
+
*/
|
|
583
|
+
async pretrain(sources, options = {}) {
|
|
584
|
+
const { batchSize = 32, onProgress, chunkSize = 512, overlapSize = 64, skipCached = true } = options;
|
|
585
|
+
const startTime = performance.now();
|
|
586
|
+
let processed = 0;
|
|
587
|
+
let cached = 0;
|
|
588
|
+
let skipped = 0;
|
|
589
|
+
// Resolve texts to embed
|
|
590
|
+
const texts = [];
|
|
591
|
+
if (typeof sources === 'string') {
|
|
592
|
+
sources = [sources];
|
|
593
|
+
}
|
|
594
|
+
for (const source of sources) {
|
|
595
|
+
// Check if it's a file path or glob pattern
|
|
596
|
+
if (source.includes('/') || source.includes('*') || source.includes('.')) {
|
|
597
|
+
try {
|
|
598
|
+
const fs = await import('fs');
|
|
599
|
+
const path = await import('path');
|
|
600
|
+
const { glob } = await import('glob').catch(() => ({ glob: null }));
|
|
601
|
+
// Handle glob patterns
|
|
602
|
+
let files = [];
|
|
603
|
+
if (source.includes('*') && glob) {
|
|
604
|
+
files = await glob(source);
|
|
605
|
+
}
|
|
606
|
+
else if (fs.existsSync(source)) {
|
|
607
|
+
files = [source];
|
|
608
|
+
}
|
|
609
|
+
for (const file of files) {
|
|
610
|
+
try {
|
|
611
|
+
const content = fs.readFileSync(file, 'utf-8');
|
|
612
|
+
// Chunk large files
|
|
613
|
+
if (content.length > chunkSize * 2) {
|
|
614
|
+
for (let i = 0; i < content.length; i += chunkSize - overlapSize) {
|
|
615
|
+
const chunk = content.slice(i, i + chunkSize);
|
|
616
|
+
if (chunk.trim().length > 10) {
|
|
617
|
+
texts.push(chunk);
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
else if (content.trim().length > 10) {
|
|
622
|
+
texts.push(content);
|
|
623
|
+
}
|
|
624
|
+
}
|
|
625
|
+
catch {
|
|
626
|
+
// Skip unreadable files
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
catch {
|
|
631
|
+
// Treat as plain text if file operations fail
|
|
632
|
+
texts.push(source);
|
|
633
|
+
}
|
|
634
|
+
}
|
|
635
|
+
else {
|
|
636
|
+
texts.push(source);
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
// Filter out already cached texts
|
|
640
|
+
const toEmbed = [];
|
|
641
|
+
for (const text of texts) {
|
|
642
|
+
if (skipCached && this.persistentCache?.has(text, this.modelName)) {
|
|
643
|
+
skipped++;
|
|
644
|
+
}
|
|
645
|
+
else {
|
|
646
|
+
toEmbed.push(text);
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
// Embed in batches
|
|
650
|
+
for (let i = 0; i < toEmbed.length; i += batchSize) {
|
|
651
|
+
const batch = toEmbed.slice(i, i + batchSize);
|
|
652
|
+
const embeddings = await this.embedBatch(batch);
|
|
653
|
+
// Store in persistent cache (embedBatch already handles this for ONNX)
|
|
654
|
+
cached += embeddings.length;
|
|
655
|
+
processed += batch.length;
|
|
656
|
+
if (onProgress) {
|
|
657
|
+
onProgress(processed, toEmbed.length);
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
return {
|
|
661
|
+
processed,
|
|
662
|
+
cached,
|
|
663
|
+
skipped,
|
|
664
|
+
timeMs: performance.now() - startTime,
|
|
665
|
+
};
|
|
666
|
+
}
|
|
667
|
+
/**
|
|
668
|
+
* Pretrain with common programming patterns
|
|
669
|
+
* Pre-caches embeddings for frequently used code patterns
|
|
670
|
+
*/
|
|
671
|
+
async pretrainCodePatterns() {
|
|
672
|
+
const patterns = [
|
|
673
|
+
// Common programming constructs
|
|
674
|
+
'function implementation',
|
|
675
|
+
'class definition',
|
|
676
|
+
'interface declaration',
|
|
677
|
+
'type alias',
|
|
678
|
+
'import statement',
|
|
679
|
+
'export module',
|
|
680
|
+
'async await pattern',
|
|
681
|
+
'promise handling',
|
|
682
|
+
'error handling try catch',
|
|
683
|
+
'conditional logic if else',
|
|
684
|
+
'loop iteration for while',
|
|
685
|
+
'array map filter reduce',
|
|
686
|
+
'object destructuring',
|
|
687
|
+
'spread operator',
|
|
688
|
+
'rest parameters',
|
|
689
|
+
// Code operations
|
|
690
|
+
'refactor code',
|
|
691
|
+
'fix bug',
|
|
692
|
+
'add feature',
|
|
693
|
+
'write tests',
|
|
694
|
+
'add documentation',
|
|
695
|
+
'optimize performance',
|
|
696
|
+
'improve readability',
|
|
697
|
+
'handle edge cases',
|
|
698
|
+
'add validation',
|
|
699
|
+
'implement authentication',
|
|
700
|
+
// File types
|
|
701
|
+
'TypeScript file',
|
|
702
|
+
'JavaScript module',
|
|
703
|
+
'React component',
|
|
704
|
+
'Vue component',
|
|
705
|
+
'CSS stylesheet',
|
|
706
|
+
'JSON configuration',
|
|
707
|
+
'Markdown documentation',
|
|
708
|
+
'Python script',
|
|
709
|
+
'Shell script',
|
|
710
|
+
'SQL query',
|
|
711
|
+
// Agent routing patterns
|
|
712
|
+
'code review task',
|
|
713
|
+
'architecture design',
|
|
714
|
+
'testing strategy',
|
|
715
|
+
'debugging session',
|
|
716
|
+
'performance analysis',
|
|
717
|
+
'security audit',
|
|
718
|
+
'documentation update',
|
|
719
|
+
'API design',
|
|
720
|
+
'database schema',
|
|
721
|
+
'deployment configuration',
|
|
722
|
+
];
|
|
723
|
+
const startTime = performance.now();
|
|
724
|
+
const embeddings = await this.embedBatch(patterns);
|
|
725
|
+
return {
|
|
726
|
+
cached: embeddings.length,
|
|
727
|
+
timeMs: performance.now() - startTime,
|
|
728
|
+
};
|
|
729
|
+
}
|
|
730
|
+
/**
|
|
731
|
+
* Pretrain from repository structure
|
|
732
|
+
* Analyzes file names and paths to pre-cache common patterns
|
|
733
|
+
*/
|
|
734
|
+
async pretrainFromRepo(repoPath = '.') {
|
|
735
|
+
const startTime = performance.now();
|
|
736
|
+
let files = 0;
|
|
737
|
+
let chunks = 0;
|
|
738
|
+
try {
|
|
739
|
+
const fs = await import('fs');
|
|
740
|
+
const path = await import('path');
|
|
741
|
+
// Common code file extensions
|
|
742
|
+
const extensions = ['.ts', '.tsx', '.js', '.jsx', '.py', '.md', '.json'];
|
|
743
|
+
const walkDir = (dir) => {
|
|
744
|
+
try {
|
|
745
|
+
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
746
|
+
for (const entry of entries) {
|
|
747
|
+
const fullPath = path.join(dir, entry.name);
|
|
748
|
+
if (entry.isDirectory()) {
|
|
749
|
+
// Skip node_modules, .git, etc.
|
|
750
|
+
if (!entry.name.startsWith('.') && entry.name !== 'node_modules' && entry.name !== 'dist') {
|
|
751
|
+
walkDir(fullPath);
|
|
752
|
+
}
|
|
753
|
+
}
|
|
754
|
+
else if (extensions.some(ext => entry.name.endsWith(ext))) {
|
|
755
|
+
return fullPath;
|
|
756
|
+
}
|
|
757
|
+
}
|
|
758
|
+
}
|
|
759
|
+
catch {
|
|
760
|
+
// Skip unreadable directories
|
|
761
|
+
}
|
|
762
|
+
return null;
|
|
763
|
+
};
|
|
764
|
+
// Collect files
|
|
765
|
+
const filePaths = [];
|
|
766
|
+
const collectFiles = (dir) => {
|
|
767
|
+
try {
|
|
768
|
+
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
769
|
+
for (const entry of entries) {
|
|
770
|
+
const fullPath = path.join(dir, entry.name);
|
|
771
|
+
if (entry.isDirectory()) {
|
|
772
|
+
if (!entry.name.startsWith('.') && entry.name !== 'node_modules' && entry.name !== 'dist') {
|
|
773
|
+
collectFiles(fullPath);
|
|
774
|
+
}
|
|
775
|
+
}
|
|
776
|
+
else if (extensions.some(ext => entry.name.endsWith(ext))) {
|
|
777
|
+
filePaths.push(fullPath);
|
|
778
|
+
}
|
|
779
|
+
}
|
|
780
|
+
}
|
|
781
|
+
catch {
|
|
782
|
+
// Skip unreadable
|
|
783
|
+
}
|
|
784
|
+
};
|
|
785
|
+
collectFiles(repoPath);
|
|
786
|
+
files = filePaths.length;
|
|
787
|
+
// Pretrain from collected files
|
|
788
|
+
if (filePaths.length > 0) {
|
|
789
|
+
const result = await this.pretrain(filePaths, {
|
|
790
|
+
batchSize: 16,
|
|
791
|
+
chunkSize: 512,
|
|
792
|
+
overlapSize: 64,
|
|
793
|
+
});
|
|
794
|
+
chunks = result.cached;
|
|
795
|
+
}
|
|
796
|
+
}
|
|
797
|
+
catch (err) {
|
|
798
|
+
// Repository analysis failed
|
|
799
|
+
}
|
|
800
|
+
return {
|
|
801
|
+
files,
|
|
802
|
+
chunks,
|
|
803
|
+
timeMs: performance.now() - startTime,
|
|
804
|
+
};
|
|
805
|
+
}
|
|
806
|
+
/**
|
|
807
|
+
* Incremental pretrain - only process changed files since last run
|
|
808
|
+
* Uses git diff to detect modified files
|
|
809
|
+
*/
|
|
810
|
+
async pretrainIncremental(options = {}) {
|
|
811
|
+
const { since = 'HEAD~10', repoPath = '.' } = options;
|
|
812
|
+
const startTime = performance.now();
|
|
813
|
+
let changedFiles = 0;
|
|
814
|
+
let newChunks = 0;
|
|
815
|
+
let skipped = 0;
|
|
816
|
+
try {
|
|
817
|
+
const { execSync } = await import('child_process');
|
|
818
|
+
const path = await import('path');
|
|
819
|
+
const fs = await import('fs');
|
|
820
|
+
// Get changed files from git
|
|
821
|
+
const gitOutput = execSync(`git diff --name-only ${since}`, {
|
|
822
|
+
cwd: repoPath,
|
|
823
|
+
encoding: 'utf-8',
|
|
824
|
+
});
|
|
825
|
+
const changedPaths = gitOutput
|
|
826
|
+
.split('\n')
|
|
827
|
+
.filter(f => f.trim())
|
|
828
|
+
.map(f => path.join(repoPath, f))
|
|
829
|
+
.filter(f => {
|
|
830
|
+
try {
|
|
831
|
+
return fs.existsSync(f) && fs.statSync(f).isFile();
|
|
832
|
+
}
|
|
833
|
+
catch {
|
|
834
|
+
return false;
|
|
835
|
+
}
|
|
836
|
+
});
|
|
837
|
+
changedFiles = changedPaths.length;
|
|
838
|
+
if (changedPaths.length > 0) {
|
|
839
|
+
const result = await this.pretrain(changedPaths, {
|
|
840
|
+
batchSize: 16,
|
|
841
|
+
chunkSize: 512,
|
|
842
|
+
overlapSize: 64,
|
|
843
|
+
skipCached: true,
|
|
844
|
+
});
|
|
845
|
+
newChunks = result.cached;
|
|
846
|
+
skipped = result.skipped;
|
|
847
|
+
}
|
|
848
|
+
}
|
|
849
|
+
catch {
|
|
850
|
+
// Git not available or not a repo
|
|
851
|
+
}
|
|
852
|
+
return {
|
|
853
|
+
changedFiles,
|
|
854
|
+
newChunks,
|
|
855
|
+
skipped,
|
|
856
|
+
timeMs: performance.now() - startTime,
|
|
857
|
+
};
|
|
858
|
+
}
|
|
859
|
+
/**
|
|
860
|
+
* Smart chunking - split code by semantic boundaries
|
|
861
|
+
* (functions, classes, etc.) instead of fixed size
|
|
862
|
+
*/
|
|
863
|
+
semanticChunk(content, fileType) {
|
|
864
|
+
const chunks = [];
|
|
865
|
+
// TypeScript/JavaScript patterns
|
|
866
|
+
if (['.ts', '.tsx', '.js', '.jsx'].some(ext => fileType.endsWith(ext))) {
|
|
867
|
+
// Split on function/class/interface boundaries
|
|
868
|
+
const patterns = [
|
|
869
|
+
/^(export\s+)?(async\s+)?function\s+\w+/gm,
|
|
870
|
+
/^(export\s+)?class\s+\w+/gm,
|
|
871
|
+
/^(export\s+)?interface\s+\w+/gm,
|
|
872
|
+
/^(export\s+)?type\s+\w+/gm,
|
|
873
|
+
/^(export\s+)?const\s+\w+\s*=/gm,
|
|
874
|
+
];
|
|
875
|
+
let lastIndex = 0;
|
|
876
|
+
const boundaries = [0];
|
|
877
|
+
for (const pattern of patterns) {
|
|
878
|
+
let match;
|
|
879
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
880
|
+
boundaries.push(match.index);
|
|
881
|
+
}
|
|
882
|
+
}
|
|
883
|
+
boundaries.push(content.length);
|
|
884
|
+
boundaries.sort((a, b) => a - b);
|
|
885
|
+
// Extract chunks between boundaries
|
|
886
|
+
for (let i = 0; i < boundaries.length - 1; i++) {
|
|
887
|
+
const chunk = content.slice(boundaries[i], boundaries[i + 1]).trim();
|
|
888
|
+
if (chunk.length > 20 && chunk.length < 2000) {
|
|
889
|
+
chunks.push(chunk);
|
|
890
|
+
}
|
|
891
|
+
}
|
|
892
|
+
}
|
|
893
|
+
// Python patterns
|
|
894
|
+
else if (fileType.endsWith('.py')) {
|
|
895
|
+
const patterns = [
|
|
896
|
+
/^(async\s+)?def\s+\w+/gm,
|
|
897
|
+
/^class\s+\w+/gm,
|
|
898
|
+
];
|
|
899
|
+
const boundaries = [0];
|
|
900
|
+
for (const pattern of patterns) {
|
|
901
|
+
let match;
|
|
902
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
903
|
+
boundaries.push(match.index);
|
|
904
|
+
}
|
|
905
|
+
}
|
|
906
|
+
boundaries.push(content.length);
|
|
907
|
+
boundaries.sort((a, b) => a - b);
|
|
908
|
+
for (let i = 0; i < boundaries.length - 1; i++) {
|
|
909
|
+
const chunk = content.slice(boundaries[i], boundaries[i + 1]).trim();
|
|
910
|
+
if (chunk.length > 20 && chunk.length < 2000) {
|
|
911
|
+
chunks.push(chunk);
|
|
912
|
+
}
|
|
913
|
+
}
|
|
914
|
+
}
|
|
915
|
+
// Markdown - split by headers
|
|
916
|
+
else if (fileType.endsWith('.md')) {
|
|
917
|
+
const sections = content.split(/^#+\s+/gm);
|
|
918
|
+
for (const section of sections) {
|
|
919
|
+
if (section.trim().length > 20) {
|
|
920
|
+
chunks.push(section.trim().slice(0, 1000));
|
|
921
|
+
}
|
|
922
|
+
}
|
|
923
|
+
}
|
|
924
|
+
// Fallback to fixed-size chunking
|
|
925
|
+
if (chunks.length === 0) {
|
|
926
|
+
const chunkSize = 512;
|
|
927
|
+
const overlap = 64;
|
|
928
|
+
for (let i = 0; i < content.length; i += chunkSize - overlap) {
|
|
929
|
+
const chunk = content.slice(i, i + chunkSize);
|
|
930
|
+
if (chunk.trim().length > 20) {
|
|
931
|
+
chunks.push(chunk);
|
|
932
|
+
}
|
|
933
|
+
}
|
|
934
|
+
}
|
|
935
|
+
return chunks;
|
|
936
|
+
}
|
|
937
|
+
/**
|
|
938
|
+
* Pretrain with semantic chunking
|
|
939
|
+
* Uses code structure to create meaningful chunks
|
|
940
|
+
*/
|
|
941
|
+
async pretrainSemantic(sources, options = {}) {
|
|
942
|
+
const { batchSize = 32, onProgress } = options;
|
|
943
|
+
const startTime = performance.now();
|
|
944
|
+
let fileCount = 0;
|
|
945
|
+
let chunkCount = 0;
|
|
946
|
+
const allChunks = [];
|
|
947
|
+
try {
|
|
948
|
+
const fs = await import('fs');
|
|
949
|
+
const path = await import('path');
|
|
950
|
+
for (const source of sources) {
|
|
951
|
+
if (fs.existsSync(source)) {
|
|
952
|
+
try {
|
|
953
|
+
const content = fs.readFileSync(source, 'utf-8');
|
|
954
|
+
const ext = path.extname(source);
|
|
955
|
+
const chunks = this.semanticChunk(content, ext);
|
|
956
|
+
allChunks.push(...chunks);
|
|
957
|
+
fileCount++;
|
|
958
|
+
}
|
|
959
|
+
catch {
|
|
960
|
+
// Skip unreadable files
|
|
961
|
+
}
|
|
962
|
+
}
|
|
963
|
+
}
|
|
964
|
+
// Embed and cache all chunks
|
|
965
|
+
for (let i = 0; i < allChunks.length; i += batchSize) {
|
|
966
|
+
const batch = allChunks.slice(i, i + batchSize);
|
|
967
|
+
await this.embedBatch(batch);
|
|
968
|
+
chunkCount += batch.length;
|
|
969
|
+
if (onProgress) {
|
|
970
|
+
onProgress(chunkCount, allChunks.length);
|
|
971
|
+
}
|
|
972
|
+
}
|
|
973
|
+
}
|
|
974
|
+
catch {
|
|
975
|
+
// Pretrain failed
|
|
976
|
+
}
|
|
977
|
+
return {
|
|
978
|
+
files: fileCount,
|
|
979
|
+
chunks: chunkCount,
|
|
980
|
+
timeMs: performance.now() - startTime,
|
|
981
|
+
};
|
|
982
|
+
}
|
|
983
|
+
/**
|
|
984
|
+
* Priority pretrain - cache most frequently used patterns first
|
|
985
|
+
* Tracks access patterns and prioritizes high-frequency queries
|
|
986
|
+
*/
|
|
987
|
+
accessCounts = new Map();
|
|
988
|
+
recordAccess(text) {
|
|
989
|
+
this.accessCounts.set(text, (this.accessCounts.get(text) || 0) + 1);
|
|
990
|
+
}
|
|
991
|
+
getTopPatterns(n = 100) {
|
|
992
|
+
return Array.from(this.accessCounts.entries())
|
|
993
|
+
.sort((a, b) => b[1] - a[1])
|
|
994
|
+
.slice(0, n)
|
|
995
|
+
.map(([text]) => text);
|
|
996
|
+
}
|
|
997
|
+
async pretrainPriority(n = 100) {
|
|
998
|
+
const topPatterns = this.getTopPatterns(n);
|
|
999
|
+
const startTime = performance.now();
|
|
1000
|
+
if (topPatterns.length > 0) {
|
|
1001
|
+
await this.embedBatch(topPatterns);
|
|
1002
|
+
}
|
|
1003
|
+
return {
|
|
1004
|
+
cached: topPatterns.length,
|
|
1005
|
+
timeMs: performance.now() - startTime,
|
|
1006
|
+
};
|
|
1007
|
+
}
|
|
1008
|
+
/**
|
|
1009
|
+
* Warmup cache on session start
|
|
1010
|
+
* Combines code patterns + recent repo changes
|
|
1011
|
+
*/
|
|
1012
|
+
async warmup(repoPath = '.') {
|
|
1013
|
+
const startTime = performance.now();
|
|
1014
|
+
// First: load common patterns
|
|
1015
|
+
const patternResult = await this.pretrainCodePatterns();
|
|
1016
|
+
// Second: load recent git changes
|
|
1017
|
+
const incrementalResult = await this.pretrainIncremental({
|
|
1018
|
+
since: 'HEAD~5',
|
|
1019
|
+
repoPath,
|
|
1020
|
+
});
|
|
1021
|
+
return {
|
|
1022
|
+
patterns: patternResult.cached,
|
|
1023
|
+
recentChanges: incrementalResult.newChunks,
|
|
1024
|
+
timeMs: performance.now() - startTime,
|
|
1025
|
+
};
|
|
1026
|
+
}
|
|
1027
|
+
/**
|
|
1028
|
+
* Intelligent pretrain using ruvector worker pool
|
|
1029
|
+
* Analyzes repo structure, code patterns, and prepares cache
|
|
1030
|
+
* Uses parallel workers for maximum throughput
|
|
1031
|
+
*/
|
|
1032
|
+
async pretrainIntelligent(options = {}) {
|
|
1033
|
+
const { repoPath = '.', parallel = true, onProgress } = options;
|
|
1034
|
+
const startTime = performance.now();
|
|
1035
|
+
const stages = {
|
|
1036
|
+
codePatterns: { count: 0, timeMs: 0 },
|
|
1037
|
+
astAnalysis: { files: 0, functions: 0, timeMs: 0 },
|
|
1038
|
+
gitHistory: { commits: 0, hotFiles: 0, timeMs: 0 },
|
|
1039
|
+
dependencies: { modules: 0, imports: 0, timeMs: 0 },
|
|
1040
|
+
semanticChunks: { chunks: 0, timeMs: 0 },
|
|
1041
|
+
};
|
|
1042
|
+
let totalCached = 0;
|
|
1043
|
+
try {
|
|
1044
|
+
// Stage 1: Code patterns (common programming patterns)
|
|
1045
|
+
onProgress?.('codePatterns', 0);
|
|
1046
|
+
const stage1Start = performance.now();
|
|
1047
|
+
const patternResult = await this.pretrainCodePatterns();
|
|
1048
|
+
stages.codePatterns = {
|
|
1049
|
+
count: patternResult.cached,
|
|
1050
|
+
timeMs: performance.now() - stage1Start,
|
|
1051
|
+
};
|
|
1052
|
+
totalCached += patternResult.cached;
|
|
1053
|
+
onProgress?.('codePatterns', 100);
|
|
1054
|
+
// Stage 2: AST Analysis using ruvector workers (if available)
|
|
1055
|
+
onProgress?.('astAnalysis', 0);
|
|
1056
|
+
const stage2Start = performance.now();
|
|
1057
|
+
try {
|
|
1058
|
+
if (ruvectorModule && parallel) {
|
|
1059
|
+
// Use ruvector's analyzeFilesParallel if available
|
|
1060
|
+
const mod = ruvectorModule;
|
|
1061
|
+
if (mod.analyzeFilesParallel) {
|
|
1062
|
+
const fs = await import('fs');
|
|
1063
|
+
const path = await import('path');
|
|
1064
|
+
// Collect source files
|
|
1065
|
+
const sourceFiles = [];
|
|
1066
|
+
const collectSources = (dir) => {
|
|
1067
|
+
try {
|
|
1068
|
+
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
1069
|
+
for (const entry of entries) {
|
|
1070
|
+
const fullPath = path.join(dir, entry.name);
|
|
1071
|
+
if (entry.isDirectory()) {
|
|
1072
|
+
if (!entry.name.startsWith('.') && entry.name !== 'node_modules' && entry.name !== 'dist') {
|
|
1073
|
+
collectSources(fullPath);
|
|
1074
|
+
}
|
|
1075
|
+
}
|
|
1076
|
+
else if (['.ts', '.tsx', '.js', '.jsx'].some(ext => entry.name.endsWith(ext))) {
|
|
1077
|
+
sourceFiles.push(fullPath);
|
|
1078
|
+
}
|
|
1079
|
+
}
|
|
1080
|
+
}
|
|
1081
|
+
catch { }
|
|
1082
|
+
};
|
|
1083
|
+
collectSources(repoPath);
|
|
1084
|
+
// Analyze in parallel
|
|
1085
|
+
const astResult = await mod.analyzeFilesParallel(sourceFiles.slice(0, 100));
|
|
1086
|
+
stages.astAnalysis = {
|
|
1087
|
+
files: sourceFiles.length,
|
|
1088
|
+
functions: astResult?.functions || 0,
|
|
1089
|
+
timeMs: performance.now() - stage2Start,
|
|
1090
|
+
};
|
|
1091
|
+
// Extract function signatures for caching
|
|
1092
|
+
if (astResult?.signatures) {
|
|
1093
|
+
await this.embedBatch(astResult.signatures.slice(0, 200));
|
|
1094
|
+
totalCached += Math.min(astResult.signatures.length, 200);
|
|
1095
|
+
}
|
|
1096
|
+
}
|
|
1097
|
+
}
|
|
1098
|
+
}
|
|
1099
|
+
catch { }
|
|
1100
|
+
onProgress?.('astAnalysis', 100);
|
|
1101
|
+
// Stage 3: Git history analysis (hot files = frequently changed)
|
|
1102
|
+
onProgress?.('gitHistory', 0);
|
|
1103
|
+
const stage3Start = performance.now();
|
|
1104
|
+
try {
|
|
1105
|
+
const { execSync } = await import('child_process');
|
|
1106
|
+
// Get commit count
|
|
1107
|
+
const commitCount = execSync('git rev-list --count HEAD', {
|
|
1108
|
+
cwd: repoPath,
|
|
1109
|
+
encoding: 'utf-8',
|
|
1110
|
+
}).trim();
|
|
1111
|
+
// Get hot files (most frequently changed)
|
|
1112
|
+
const hotFilesOutput = execSync('git log --format="" --name-only -n 100 | sort | uniq -c | sort -rn | head -20', { cwd: repoPath, encoding: 'utf-8' });
|
|
1113
|
+
const hotFiles = hotFilesOutput
|
|
1114
|
+
.split('\n')
|
|
1115
|
+
.filter(l => l.trim())
|
|
1116
|
+
.map(l => l.trim().split(/\s+/).slice(1).join(' '))
|
|
1117
|
+
.filter(f => f);
|
|
1118
|
+
stages.gitHistory = {
|
|
1119
|
+
commits: parseInt(commitCount) || 0,
|
|
1120
|
+
hotFiles: hotFiles.length,
|
|
1121
|
+
timeMs: performance.now() - stage3Start,
|
|
1122
|
+
};
|
|
1123
|
+
// Pretrain hot files
|
|
1124
|
+
if (hotFiles.length > 0) {
|
|
1125
|
+
const fs = await import('fs');
|
|
1126
|
+
const path = await import('path');
|
|
1127
|
+
const validFiles = hotFiles
|
|
1128
|
+
.map(f => path.join(repoPath, f))
|
|
1129
|
+
.filter(f => fs.existsSync(f));
|
|
1130
|
+
if (validFiles.length > 0) {
|
|
1131
|
+
const result = await this.pretrainSemantic(validFiles, { batchSize: 16 });
|
|
1132
|
+
totalCached += result.chunks;
|
|
1133
|
+
}
|
|
1134
|
+
}
|
|
1135
|
+
}
|
|
1136
|
+
catch { }
|
|
1137
|
+
onProgress?.('gitHistory', 100);
|
|
1138
|
+
// Stage 4: Dependency analysis
|
|
1139
|
+
onProgress?.('dependencies', 0);
|
|
1140
|
+
const stage4Start = performance.now();
|
|
1141
|
+
try {
|
|
1142
|
+
const fs = await import('fs');
|
|
1143
|
+
const path = await import('path');
|
|
1144
|
+
// Parse package.json for dependencies
|
|
1145
|
+
const pkgPath = path.join(repoPath, 'package.json');
|
|
1146
|
+
if (fs.existsSync(pkgPath)) {
|
|
1147
|
+
const pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf-8'));
|
|
1148
|
+
const deps = Object.keys(pkg.dependencies || {});
|
|
1149
|
+
const devDeps = Object.keys(pkg.devDependencies || {});
|
|
1150
|
+
const allDeps = [...deps, ...devDeps];
|
|
1151
|
+
stages.dependencies = {
|
|
1152
|
+
modules: allDeps.length,
|
|
1153
|
+
imports: 0,
|
|
1154
|
+
timeMs: performance.now() - stage4Start,
|
|
1155
|
+
};
|
|
1156
|
+
// Cache dependency names for import resolution
|
|
1157
|
+
if (allDeps.length > 0) {
|
|
1158
|
+
const depPatterns = allDeps.map(d => `import from ${d}`);
|
|
1159
|
+
await this.embedBatch(depPatterns);
|
|
1160
|
+
totalCached += depPatterns.length;
|
|
1161
|
+
}
|
|
1162
|
+
}
|
|
1163
|
+
}
|
|
1164
|
+
catch { }
|
|
1165
|
+
onProgress?.('dependencies', 100);
|
|
1166
|
+
// Stage 5: Semantic chunking with parallel embedding
|
|
1167
|
+
onProgress?.('semanticChunks', 0);
|
|
1168
|
+
const stage5Start = performance.now();
|
|
1169
|
+
try {
|
|
1170
|
+
const incrementalResult = await this.pretrainIncremental({
|
|
1171
|
+
since: 'HEAD~20',
|
|
1172
|
+
repoPath,
|
|
1173
|
+
});
|
|
1174
|
+
stages.semanticChunks = {
|
|
1175
|
+
chunks: incrementalResult.newChunks,
|
|
1176
|
+
timeMs: performance.now() - stage5Start,
|
|
1177
|
+
};
|
|
1178
|
+
totalCached += incrementalResult.newChunks;
|
|
1179
|
+
}
|
|
1180
|
+
catch { }
|
|
1181
|
+
onProgress?.('semanticChunks', 100);
|
|
1182
|
+
}
|
|
1183
|
+
catch (err) {
|
|
1184
|
+
// Pretrain failed, return partial results
|
|
1185
|
+
}
|
|
1186
|
+
return {
|
|
1187
|
+
stages,
|
|
1188
|
+
totalCached,
|
|
1189
|
+
totalTimeMs: performance.now() - startTime,
|
|
1190
|
+
};
|
|
1191
|
+
}
|
|
1192
|
+
/**
|
|
1193
|
+
* Background pretrain - runs in worker if available
|
|
1194
|
+
* Non-blocking, returns immediately with a promise
|
|
1195
|
+
*/
|
|
1196
|
+
pretrainBackground(options = {}) {
|
|
1197
|
+
let cancelled = false;
|
|
1198
|
+
const promise = (async () => {
|
|
1199
|
+
if (cancelled)
|
|
1200
|
+
return;
|
|
1201
|
+
// Run warmup in background
|
|
1202
|
+
await this.warmup(options.repoPath);
|
|
1203
|
+
if (cancelled)
|
|
1204
|
+
return;
|
|
1205
|
+
// Then run intelligent pretrain
|
|
1206
|
+
await this.pretrainIntelligent({
|
|
1207
|
+
...options,
|
|
1208
|
+
parallel: true,
|
|
1209
|
+
});
|
|
1210
|
+
})();
|
|
1211
|
+
return {
|
|
1212
|
+
promise,
|
|
1213
|
+
cancel: () => { cancelled = true; },
|
|
1214
|
+
};
|
|
1215
|
+
}
|
|
1216
|
+
/**
|
|
1217
|
+
* AI-enhanced pretrain using ruvector attention mechanisms
|
|
1218
|
+
* Uses HyperbolicAttention for code structure, MoE for routing
|
|
1219
|
+
*/
|
|
1220
|
+
async pretrainWithAI(options = {}) {
|
|
1221
|
+
const { repoPath = '.', attentionType = 'auto', onProgress } = options;
|
|
1222
|
+
const startTime = performance.now();
|
|
1223
|
+
const patterns = [];
|
|
1224
|
+
let totalCached = 0;
|
|
1225
|
+
let attentionInfo = { type: 'none', timeMs: 0 };
|
|
1226
|
+
let predictions = { prefetch: 0, confidence: 0 };
|
|
1227
|
+
try {
|
|
1228
|
+
const mod = ruvectorModule;
|
|
1229
|
+
// Step 1: Determine best attention type for codebase
|
|
1230
|
+
onProgress?.('attention', 'Selecting optimal attention mechanism...');
|
|
1231
|
+
let selectedAttention = attentionType;
|
|
1232
|
+
if (attentionType === 'auto' && mod) {
|
|
1233
|
+
// Use getAttentionForUseCase if available
|
|
1234
|
+
if (mod.getAttentionForUseCase) {
|
|
1235
|
+
const result = await mod.getAttentionForUseCase('code_analysis');
|
|
1236
|
+
selectedAttention = result?.type || 'hyperbolic';
|
|
1237
|
+
}
|
|
1238
|
+
else {
|
|
1239
|
+
// Default to hyperbolic for hierarchical code structure
|
|
1240
|
+
selectedAttention = 'hyperbolic';
|
|
1241
|
+
}
|
|
1242
|
+
}
|
|
1243
|
+
attentionInfo.type = selectedAttention;
|
|
1244
|
+
const attentionStart = performance.now();
|
|
1245
|
+
// Step 2: Use attention to identify important code regions
|
|
1246
|
+
onProgress?.('analysis', `Using ${selectedAttention} attention for code analysis...`);
|
|
1247
|
+
if (mod) {
|
|
1248
|
+
// Collect code samples for attention-based analysis
|
|
1249
|
+
const fs = await import('fs');
|
|
1250
|
+
const path = await import('path');
|
|
1251
|
+
const codeSamples = [];
|
|
1252
|
+
const collectCode = (dir, maxFiles = 50) => {
|
|
1253
|
+
if (codeSamples.length >= maxFiles)
|
|
1254
|
+
return;
|
|
1255
|
+
try {
|
|
1256
|
+
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
1257
|
+
for (const entry of entries) {
|
|
1258
|
+
if (codeSamples.length >= maxFiles)
|
|
1259
|
+
break;
|
|
1260
|
+
const fullPath = path.join(dir, entry.name);
|
|
1261
|
+
if (entry.isDirectory()) {
|
|
1262
|
+
if (!entry.name.startsWith('.') && entry.name !== 'node_modules' && entry.name !== 'dist') {
|
|
1263
|
+
collectCode(fullPath, maxFiles);
|
|
1264
|
+
}
|
|
1265
|
+
}
|
|
1266
|
+
else if (['.ts', '.tsx', '.js', '.jsx'].some(ext => entry.name.endsWith(ext))) {
|
|
1267
|
+
try {
|
|
1268
|
+
const content = fs.readFileSync(fullPath, 'utf-8');
|
|
1269
|
+
if (content.length < 5000) {
|
|
1270
|
+
codeSamples.push(content);
|
|
1271
|
+
}
|
|
1272
|
+
}
|
|
1273
|
+
catch { }
|
|
1274
|
+
}
|
|
1275
|
+
}
|
|
1276
|
+
}
|
|
1277
|
+
catch { }
|
|
1278
|
+
};
|
|
1279
|
+
collectCode(repoPath);
|
|
1280
|
+
// Step 3: Use attention mechanisms to weight code importance
|
|
1281
|
+
if (mod.HyperbolicAttention && selectedAttention === 'hyperbolic') {
|
|
1282
|
+
try {
|
|
1283
|
+
// Hyperbolic attention for hierarchical code structure
|
|
1284
|
+
const attention = new mod.HyperbolicAttention({ dim: 384 });
|
|
1285
|
+
// Identify structural patterns (classes, functions, imports)
|
|
1286
|
+
const structuralPatterns = [
|
|
1287
|
+
'class definition with constructor',
|
|
1288
|
+
'async function with error handling',
|
|
1289
|
+
'interface with multiple properties',
|
|
1290
|
+
'type with generics',
|
|
1291
|
+
'import statement block',
|
|
1292
|
+
'export default component',
|
|
1293
|
+
'hook implementation useEffect',
|
|
1294
|
+
'API endpoint handler',
|
|
1295
|
+
'database query function',
|
|
1296
|
+
'authentication middleware',
|
|
1297
|
+
];
|
|
1298
|
+
await this.embedBatch(structuralPatterns);
|
|
1299
|
+
patterns.push({ type: 'structural', count: structuralPatterns.length });
|
|
1300
|
+
totalCached += structuralPatterns.length;
|
|
1301
|
+
}
|
|
1302
|
+
catch { }
|
|
1303
|
+
}
|
|
1304
|
+
if (mod.MoEAttention && selectedAttention === 'moe') {
|
|
1305
|
+
try {
|
|
1306
|
+
// MoE for routing different code patterns to experts
|
|
1307
|
+
const routingPatterns = [
|
|
1308
|
+
// Expert 1: Frontend
|
|
1309
|
+
'React component with state',
|
|
1310
|
+
'Vue component with props',
|
|
1311
|
+
'CSS styling module',
|
|
1312
|
+
// Expert 2: Backend
|
|
1313
|
+
'Express route handler',
|
|
1314
|
+
'GraphQL resolver',
|
|
1315
|
+
'REST API endpoint',
|
|
1316
|
+
// Expert 3: Data
|
|
1317
|
+
'SQL query builder',
|
|
1318
|
+
'MongoDB aggregation',
|
|
1319
|
+
'Redis cache operation',
|
|
1320
|
+
// Expert 4: Testing
|
|
1321
|
+
'Jest test case',
|
|
1322
|
+
'E2E test scenario',
|
|
1323
|
+
'Mock implementation',
|
|
1324
|
+
];
|
|
1325
|
+
await this.embedBatch(routingPatterns);
|
|
1326
|
+
patterns.push({ type: 'routing', count: routingPatterns.length });
|
|
1327
|
+
totalCached += routingPatterns.length;
|
|
1328
|
+
}
|
|
1329
|
+
catch { }
|
|
1330
|
+
}
|
|
1331
|
+
if (mod.GraphRoPeAttention && selectedAttention === 'graph') {
|
|
1332
|
+
try {
|
|
1333
|
+
// Graph attention for dependency understanding
|
|
1334
|
+
const graphPatterns = [
|
|
1335
|
+
'module exports',
|
|
1336
|
+
'circular dependency',
|
|
1337
|
+
'shared utility import',
|
|
1338
|
+
'type re-export',
|
|
1339
|
+
'barrel file index',
|
|
1340
|
+
'lazy import dynamic',
|
|
1341
|
+
'peer dependency',
|
|
1342
|
+
'optional dependency',
|
|
1343
|
+
];
|
|
1344
|
+
await this.embedBatch(graphPatterns);
|
|
1345
|
+
patterns.push({ type: 'graph', count: graphPatterns.length });
|
|
1346
|
+
totalCached += graphPatterns.length;
|
|
1347
|
+
}
|
|
1348
|
+
catch { }
|
|
1349
|
+
}
|
|
1350
|
+
attentionInfo.timeMs = performance.now() - attentionStart;
|
|
1351
|
+
// Step 4: FastGRNN for pattern prediction (if available)
|
|
1352
|
+
onProgress?.('prediction', 'Training pattern predictor...');
|
|
1353
|
+
if (mod.FastGRNN) {
|
|
1354
|
+
try {
|
|
1355
|
+
// Use recent access patterns to predict what's needed next
|
|
1356
|
+
const topPatterns = this.getTopPatterns(50);
|
|
1357
|
+
if (topPatterns.length > 0) {
|
|
1358
|
+
// Prefetch predicted patterns
|
|
1359
|
+
const prefetchPatterns = [
|
|
1360
|
+
...topPatterns.slice(0, 20),
|
|
1361
|
+
// Add related patterns
|
|
1362
|
+
...topPatterns.slice(0, 10).map(p => `similar to: ${p}`),
|
|
1363
|
+
];
|
|
1364
|
+
await this.embedBatch(prefetchPatterns);
|
|
1365
|
+
predictions = {
|
|
1366
|
+
prefetch: prefetchPatterns.length,
|
|
1367
|
+
confidence: 0.85, // Estimated based on access history
|
|
1368
|
+
};
|
|
1369
|
+
totalCached += prefetchPatterns.length;
|
|
1370
|
+
}
|
|
1371
|
+
}
|
|
1372
|
+
catch { }
|
|
1373
|
+
}
|
|
1374
|
+
}
|
|
1375
|
+
// Step 5: Standard warmup
|
|
1376
|
+
onProgress?.('warmup', 'Running standard warmup...');
|
|
1377
|
+
const warmupResult = await this.warmup(repoPath);
|
|
1378
|
+
totalCached += warmupResult.patterns + warmupResult.recentChanges;
|
|
1379
|
+
patterns.push({ type: 'warmup', count: warmupResult.patterns + warmupResult.recentChanges });
|
|
1380
|
+
}
|
|
1381
|
+
catch (err) {
|
|
1382
|
+
// AI pretrain failed, continue with basic
|
|
1383
|
+
}
|
|
1384
|
+
return {
|
|
1385
|
+
patterns,
|
|
1386
|
+
attention: attentionInfo,
|
|
1387
|
+
predictions,
|
|
1388
|
+
totalCached,
|
|
1389
|
+
totalTimeMs: performance.now() - startTime,
|
|
1390
|
+
};
|
|
1391
|
+
}
|
|
1392
|
+
/**
|
|
1393
|
+
* Context-aware prefetch using attention
|
|
1394
|
+
* Predicts what embeddings will be needed based on current context
|
|
1395
|
+
*/
|
|
1396
|
+
async prefetchForContext(context) {
|
|
1397
|
+
const startTime = performance.now();
|
|
1398
|
+
let prefetched = 0;
|
|
1399
|
+
let confidence = 0;
|
|
1400
|
+
try {
|
|
1401
|
+
const patterns = [];
|
|
1402
|
+
// Add patterns based on current file type
|
|
1403
|
+
if (context.currentFile) {
|
|
1404
|
+
const ext = context.currentFile.split('.').pop() || '';
|
|
1405
|
+
const filePatterns = {
|
|
1406
|
+
ts: ['TypeScript type checking', 'interface implementation', 'generic types'],
|
|
1407
|
+
tsx: ['React component', 'JSX rendering', 'hook usage'],
|
|
1408
|
+
js: ['JavaScript module', 'CommonJS require', 'ES6 import'],
|
|
1409
|
+
jsx: ['React component', 'JSX element', 'props handling'],
|
|
1410
|
+
py: ['Python function', 'class method', 'import statement'],
|
|
1411
|
+
md: ['documentation', 'README section', 'code example'],
|
|
1412
|
+
};
|
|
1413
|
+
patterns.push(...(filePatterns[ext] || []));
|
|
1414
|
+
}
|
|
1415
|
+
// Add patterns based on task type
|
|
1416
|
+
if (context.taskType) {
|
|
1417
|
+
const taskPatterns = {
|
|
1418
|
+
edit: ['code modification', 'variable rename', 'function update'],
|
|
1419
|
+
review: ['code review', 'bug detection', 'style check'],
|
|
1420
|
+
debug: ['error trace', 'stack analysis', 'variable inspection'],
|
|
1421
|
+
test: ['test case', 'assertion', 'mock setup'],
|
|
1422
|
+
refactor: ['code cleanup', 'pattern extraction', 'abstraction'],
|
|
1423
|
+
};
|
|
1424
|
+
patterns.push(...(taskPatterns[context.taskType] || []));
|
|
1425
|
+
}
|
|
1426
|
+
// Add patterns based on user query similarity
|
|
1427
|
+
if (context.userQuery) {
|
|
1428
|
+
patterns.push(context.userQuery);
|
|
1429
|
+
// Add variations
|
|
1430
|
+
patterns.push(`how to ${context.userQuery}`);
|
|
1431
|
+
patterns.push(`implement ${context.userQuery}`);
|
|
1432
|
+
}
|
|
1433
|
+
if (patterns.length > 0) {
|
|
1434
|
+
await this.embedBatch(patterns);
|
|
1435
|
+
prefetched = patterns.length;
|
|
1436
|
+
confidence = Math.min(0.9, 0.5 + patterns.length * 0.05);
|
|
1437
|
+
}
|
|
1438
|
+
}
|
|
1439
|
+
catch {
|
|
1440
|
+
// Prefetch failed
|
|
1441
|
+
}
|
|
1442
|
+
return {
|
|
1443
|
+
prefetched,
|
|
1444
|
+
confidence,
|
|
1445
|
+
timeMs: performance.now() - startTime,
|
|
1446
|
+
};
|
|
1447
|
+
}
|
|
575
1448
|
}
|
|
576
1449
|
// Export singleton getter
|
|
577
1450
|
export function getEmbeddingService() {
|
|
@@ -584,6 +1457,12 @@ export async function embed(text) {
|
|
|
584
1457
|
export async function embedBatch(texts) {
|
|
585
1458
|
return getEmbeddingService().embedBatch(texts);
|
|
586
1459
|
}
|
|
1460
|
+
export async function pretrainCodePatterns() {
|
|
1461
|
+
return getEmbeddingService().pretrainCodePatterns();
|
|
1462
|
+
}
|
|
1463
|
+
export async function pretrainFromRepo(repoPath = '.') {
|
|
1464
|
+
return getEmbeddingService().pretrainFromRepo(repoPath);
|
|
1465
|
+
}
|
|
587
1466
|
export async function textSimilarity(text1, text2) {
|
|
588
1467
|
return getEmbeddingService().similarity(text1, text2);
|
|
589
1468
|
}
|