ruvector 0.1.81 → 0.1.82
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +182 -39
- package/dist/core/adaptive-embedder.d.ts +148 -0
- package/dist/core/adaptive-embedder.d.ts.map +1 -0
- package/dist/core/adaptive-embedder.js +594 -0
- package/dist/core/index.d.ts +2 -0
- package/dist/core/index.d.ts.map +1 -1
- package/dist/core/index.js +4 -1
- package/package.json +1 -1
package/bin/cli.js
CHANGED
|
@@ -1919,50 +1919,193 @@ program
|
|
|
1919
1919
|
// Embed Command - Generate embeddings
|
|
1920
1920
|
// =============================================================================
|
|
1921
1921
|
|
|
1922
|
-
|
|
1923
|
-
|
|
1924
|
-
|
|
1925
|
-
.option('-t, --text <string>', 'Text to embed')
|
|
1926
|
-
.option('-f, --file <path>', 'File containing text (one per line)')
|
|
1927
|
-
.option('-m, --model <name>', 'Embedding model', 'all-minilm-l6-v2')
|
|
1928
|
-
.option('-o, --output <file>', 'Output file for embeddings')
|
|
1929
|
-
.option('--info', 'Show embedding info')
|
|
1930
|
-
.action(async (options) => {
|
|
1931
|
-
console.log(chalk.cyan('\n═══════════════════════════════════════════════════════════════'));
|
|
1932
|
-
console.log(chalk.cyan(' RuVector Embed'));
|
|
1933
|
-
console.log(chalk.cyan('═══════════════════════════════════════════════════════════════\n'));
|
|
1922
|
+
// =============================================================================
|
|
1923
|
+
// Embed Command - Generate embeddings (now with ONNX + Adaptive LoRA)
|
|
1924
|
+
// =============================================================================
|
|
1934
1925
|
|
|
1935
|
-
|
|
1936
|
-
|
|
1937
|
-
|
|
1938
|
-
|
|
1939
|
-
|
|
1940
|
-
|
|
1941
|
-
|
|
1942
|
-
|
|
1943
|
-
|
|
1944
|
-
|
|
1945
|
-
|
|
1946
|
-
|
|
1947
|
-
|
|
1948
|
-
|
|
1949
|
-
|
|
1950
|
-
|
|
1951
|
-
|
|
1952
|
-
|
|
1953
|
-
|
|
1954
|
-
|
|
1926
|
+
const embedCmd = program.command('embed').description('Generate embeddings from text (ONNX + Adaptive LoRA)');
|
|
1927
|
+
|
|
1928
|
+
embedCmd
|
|
1929
|
+
.command('text')
|
|
1930
|
+
.description('Embed a text string')
|
|
1931
|
+
.argument('<text>', 'Text to embed')
|
|
1932
|
+
.option('--adaptive', 'Use adaptive embedder with LoRA')
|
|
1933
|
+
.option('--domain <domain>', 'Domain for prototype learning')
|
|
1934
|
+
.option('-o, --output <file>', 'Output file for embedding')
|
|
1935
|
+
.action(async (text, opts) => {
|
|
1936
|
+
try {
|
|
1937
|
+
const { performance } = require('perf_hooks');
|
|
1938
|
+
const start = performance.now();
|
|
1939
|
+
|
|
1940
|
+
if (opts.adaptive) {
|
|
1941
|
+
const { initAdaptiveEmbedder } = require('../dist/core/adaptive-embedder.js');
|
|
1942
|
+
const embedder = await initAdaptiveEmbedder();
|
|
1943
|
+
const embedding = await embedder.embed(text, { domain: opts.domain });
|
|
1944
|
+
const stats = embedder.getStats();
|
|
1945
|
+
|
|
1946
|
+
console.log(chalk.cyan('\n🧠 Adaptive Embedding (ONNX + Micro-LoRA)\n'));
|
|
1947
|
+
console.log(chalk.dim(`Text: "${text.slice(0, 60)}..."`));
|
|
1948
|
+
console.log(chalk.dim(`Dimension: ${embedding.length}`));
|
|
1949
|
+
console.log(chalk.dim(`LoRA rank: ${stats.loraRank} (${stats.loraParams} params)`));
|
|
1950
|
+
console.log(chalk.dim(`Prototypes: ${stats.prototypes}`));
|
|
1951
|
+
console.log(chalk.dim(`Time: ${(performance.now() - start).toFixed(1)}ms`));
|
|
1952
|
+
|
|
1953
|
+
if (opts.output) {
|
|
1954
|
+
fs.writeFileSync(opts.output, JSON.stringify({ text, embedding, stats }, null, 2));
|
|
1955
|
+
console.log(chalk.green(`\nSaved to ${opts.output}`));
|
|
1956
|
+
}
|
|
1957
|
+
} else {
|
|
1958
|
+
const { initOnnxEmbedder, embed } = require('../dist/core/onnx-embedder.js');
|
|
1959
|
+
await initOnnxEmbedder();
|
|
1960
|
+
const result = await embed(text);
|
|
1961
|
+
|
|
1962
|
+
console.log(chalk.cyan('\n📊 ONNX Embedding (all-MiniLM-L6-v2)\n'));
|
|
1963
|
+
console.log(chalk.dim(`Text: "${text.slice(0, 60)}..."`));
|
|
1964
|
+
console.log(chalk.dim(`Dimension: ${result.embedding.length}`));
|
|
1965
|
+
console.log(chalk.dim(`Time: ${(performance.now() - start).toFixed(1)}ms`));
|
|
1966
|
+
|
|
1967
|
+
if (opts.output) {
|
|
1968
|
+
fs.writeFileSync(opts.output, JSON.stringify({ text, embedding: result.embedding }, null, 2));
|
|
1969
|
+
console.log(chalk.green(`\nSaved to ${opts.output}`));
|
|
1970
|
+
}
|
|
1971
|
+
}
|
|
1972
|
+
} catch (e) {
|
|
1973
|
+
console.error(chalk.red('Embedding failed:'), e.message);
|
|
1955
1974
|
}
|
|
1975
|
+
});
|
|
1956
1976
|
|
|
1957
|
-
|
|
1958
|
-
|
|
1959
|
-
|
|
1960
|
-
|
|
1961
|
-
|
|
1962
|
-
|
|
1977
|
+
embedCmd
|
|
1978
|
+
.command('adaptive')
|
|
1979
|
+
.description('Adaptive embedding with Micro-LoRA optimization')
|
|
1980
|
+
.option('--stats', 'Show adaptive embedder statistics')
|
|
1981
|
+
.option('--consolidate', 'Run EWC consolidation')
|
|
1982
|
+
.option('--reset', 'Reset adaptive weights')
|
|
1983
|
+
.option('--export <file>', 'Export learned weights')
|
|
1984
|
+
.option('--import <file>', 'Import learned weights')
|
|
1985
|
+
.action(async (opts) => {
|
|
1986
|
+
try {
|
|
1987
|
+
const { initAdaptiveEmbedder } = require('../dist/core/adaptive-embedder.js');
|
|
1988
|
+
const embedder = await initAdaptiveEmbedder();
|
|
1989
|
+
|
|
1990
|
+
if (opts.stats) {
|
|
1991
|
+
const stats = embedder.getStats();
|
|
1992
|
+
console.log(chalk.cyan('\n🧠 Adaptive Embedder Statistics\n'));
|
|
1993
|
+
console.log(chalk.white('Base Model:'), chalk.dim(stats.baseModel));
|
|
1994
|
+
console.log(chalk.white('Dimension:'), chalk.dim(stats.dimension));
|
|
1995
|
+
console.log(chalk.white('LoRA Rank:'), chalk.dim(stats.loraRank));
|
|
1996
|
+
console.log(chalk.white('LoRA Params:'), chalk.dim(`${stats.loraParams} (~${(stats.loraParams / (stats.dimension * stats.dimension) * 100).toFixed(2)}% of base)`));
|
|
1997
|
+
console.log(chalk.white('Adaptations:'), chalk.dim(stats.adaptations));
|
|
1998
|
+
console.log(chalk.white('Prototypes:'), chalk.dim(stats.prototypes));
|
|
1999
|
+
console.log(chalk.white('Memory Size:'), chalk.dim(stats.memorySize));
|
|
2000
|
+
console.log(chalk.white('EWC Consolidations:'), chalk.dim(stats.ewcConsolidations));
|
|
2001
|
+
console.log(chalk.white('Contrastive Updates:'), chalk.dim(stats.contrastiveUpdates));
|
|
2002
|
+
console.log('');
|
|
2003
|
+
}
|
|
2004
|
+
|
|
2005
|
+
if (opts.consolidate) {
|
|
2006
|
+
console.log(chalk.yellow('Running EWC consolidation...'));
|
|
2007
|
+
await embedder.consolidate();
|
|
2008
|
+
console.log(chalk.green('✓ Consolidation complete'));
|
|
2009
|
+
}
|
|
2010
|
+
|
|
2011
|
+
if (opts.reset) {
|
|
2012
|
+
embedder.reset();
|
|
2013
|
+
console.log(chalk.green('✓ Adaptive weights reset'));
|
|
2014
|
+
}
|
|
2015
|
+
|
|
2016
|
+
if (opts.export) {
|
|
2017
|
+
const data = embedder.export();
|
|
2018
|
+
fs.writeFileSync(opts.export, JSON.stringify(data, null, 2));
|
|
2019
|
+
console.log(chalk.green(`✓ Exported to ${opts.export}`));
|
|
2020
|
+
}
|
|
2021
|
+
|
|
2022
|
+
if (opts.import) {
|
|
2023
|
+
const data = JSON.parse(fs.readFileSync(opts.import, 'utf-8'));
|
|
2024
|
+
embedder.import(data);
|
|
2025
|
+
console.log(chalk.green(`✓ Imported from ${opts.import}`));
|
|
2026
|
+
}
|
|
2027
|
+
} catch (e) {
|
|
2028
|
+
console.error(chalk.red('Error:'), e.message);
|
|
1963
2029
|
}
|
|
2030
|
+
});
|
|
1964
2031
|
|
|
1965
|
-
|
|
2032
|
+
embedCmd
|
|
2033
|
+
.command('benchmark')
|
|
2034
|
+
.description('Benchmark base vs adaptive embeddings')
|
|
2035
|
+
.option('--iterations <n>', 'Number of iterations', '10')
|
|
2036
|
+
.action(async (opts) => {
|
|
2037
|
+
try {
|
|
2038
|
+
const { performance } = require('perf_hooks');
|
|
2039
|
+
const iterations = parseInt(opts.iterations) || 10;
|
|
2040
|
+
|
|
2041
|
+
console.log(chalk.cyan('\n🚀 Embedding Benchmark: Base ONNX vs Adaptive LoRA\n'));
|
|
2042
|
+
|
|
2043
|
+
const testTexts = [
|
|
2044
|
+
'This is a test sentence for embedding generation.',
|
|
2045
|
+
'The quick brown fox jumps over the lazy dog.',
|
|
2046
|
+
'Machine learning models can learn from data.',
|
|
2047
|
+
'Vector databases enable semantic search.',
|
|
2048
|
+
];
|
|
2049
|
+
|
|
2050
|
+
// Benchmark base ONNX
|
|
2051
|
+
const { initOnnxEmbedder, embed, embedBatch } = require('../dist/core/onnx-embedder.js');
|
|
2052
|
+
await initOnnxEmbedder();
|
|
2053
|
+
|
|
2054
|
+
console.log(chalk.yellow('1. Base ONNX Embeddings'));
|
|
2055
|
+
const baseStart = performance.now();
|
|
2056
|
+
for (let i = 0; i < iterations; i++) {
|
|
2057
|
+
await embed(testTexts[i % testTexts.length]);
|
|
2058
|
+
}
|
|
2059
|
+
const baseTime = (performance.now() - baseStart) / iterations;
|
|
2060
|
+
console.log(chalk.dim(` Single: ${baseTime.toFixed(1)}ms avg`));
|
|
2061
|
+
|
|
2062
|
+
const baseBatchStart = performance.now();
|
|
2063
|
+
for (let i = 0; i < Math.ceil(iterations / 4); i++) {
|
|
2064
|
+
await embedBatch(testTexts);
|
|
2065
|
+
}
|
|
2066
|
+
const baseBatchTime = (performance.now() - baseBatchStart) / Math.ceil(iterations / 4);
|
|
2067
|
+
console.log(chalk.dim(` Batch(4): ${baseBatchTime.toFixed(1)}ms avg (${(4000 / baseBatchTime).toFixed(1)}/s)`));
|
|
2068
|
+
|
|
2069
|
+
// Benchmark adaptive
|
|
2070
|
+
const { initAdaptiveEmbedder } = require('../dist/core/adaptive-embedder.js');
|
|
2071
|
+
const adaptive = await initAdaptiveEmbedder();
|
|
2072
|
+
|
|
2073
|
+
console.log(chalk.yellow('\n2. Adaptive ONNX + LoRA'));
|
|
2074
|
+
const adaptStart = performance.now();
|
|
2075
|
+
for (let i = 0; i < iterations; i++) {
|
|
2076
|
+
await adaptive.embed(testTexts[i % testTexts.length]);
|
|
2077
|
+
}
|
|
2078
|
+
const adaptTime = (performance.now() - adaptStart) / iterations;
|
|
2079
|
+
console.log(chalk.dim(` Single: ${adaptTime.toFixed(1)}ms avg`));
|
|
2080
|
+
|
|
2081
|
+
const adaptBatchStart = performance.now();
|
|
2082
|
+
for (let i = 0; i < Math.ceil(iterations / 4); i++) {
|
|
2083
|
+
await adaptive.embedBatch(testTexts);
|
|
2084
|
+
}
|
|
2085
|
+
const adaptBatchTime = (performance.now() - adaptBatchStart) / Math.ceil(iterations / 4);
|
|
2086
|
+
console.log(chalk.dim(` Batch(4): ${adaptBatchTime.toFixed(1)}ms avg (${(4000 / adaptBatchTime).toFixed(1)}/s)`));
|
|
2087
|
+
|
|
2088
|
+
// Summary
|
|
2089
|
+
console.log(chalk.cyan('\n═══════════════════════════════════════════════════════════════'));
|
|
2090
|
+
console.log(chalk.bold('Summary'));
|
|
2091
|
+
console.log(chalk.cyan('═══════════════════════════════════════════════════════════════'));
|
|
2092
|
+
const stats = adaptive.getStats();
|
|
2093
|
+
console.log(chalk.dim(`\nAdaptive overhead: +${(adaptTime - baseTime).toFixed(1)}ms (+${((adaptTime/baseTime - 1) * 100).toFixed(1)}%)`));
|
|
2094
|
+
console.log(chalk.dim(`LoRA params: ${stats.loraParams} (rank ${stats.loraRank})`));
|
|
2095
|
+
console.log(chalk.dim(`Memory prototypes: ${stats.prototypes}`));
|
|
2096
|
+
console.log(chalk.dim(`Episodic memory: ${stats.memorySize} entries`));
|
|
2097
|
+
|
|
2098
|
+
console.log(chalk.white('\nBenefits of Adaptive:'));
|
|
2099
|
+
console.log(chalk.dim(' • Domain-specific fine-tuning via Micro-LoRA'));
|
|
2100
|
+
console.log(chalk.dim(' • Contrastive learning from co-edit patterns'));
|
|
2101
|
+
console.log(chalk.dim(' • EWC++ prevents catastrophic forgetting'));
|
|
2102
|
+
console.log(chalk.dim(' • Prototype-based domain adaptation'));
|
|
2103
|
+
console.log(chalk.dim(' • Episodic memory augmentation'));
|
|
2104
|
+
console.log('');
|
|
2105
|
+
} catch (e) {
|
|
2106
|
+
console.error(chalk.red('Benchmark failed:'), e.message);
|
|
2107
|
+
if (e.stack) console.error(chalk.dim(e.stack));
|
|
2108
|
+
}
|
|
1966
2109
|
});
|
|
1967
2110
|
|
|
1968
2111
|
// =============================================================================
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AdaptiveEmbedder - Micro-LoRA Style Optimization for ONNX Embeddings
|
|
3
|
+
*
|
|
4
|
+
* Applies continual learning techniques to frozen ONNX embeddings:
|
|
5
|
+
*
|
|
6
|
+
* 1. MICRO-LORA ADAPTERS
|
|
7
|
+
* - Low-rank projection layers (rank 2-8) on top of frozen embeddings
|
|
8
|
+
* - Domain-specific fine-tuning with minimal parameters
|
|
9
|
+
* - ~0.1% of base model parameters
|
|
10
|
+
*
|
|
11
|
+
* 2. CONTRASTIVE LEARNING
|
|
12
|
+
* - Files edited together → embeddings closer
|
|
13
|
+
* - Semantic clustering from trajectories
|
|
14
|
+
* - Online learning from user behavior
|
|
15
|
+
*
|
|
16
|
+
* 3. EWC++ (Elastic Weight Consolidation)
|
|
17
|
+
* - Prevents catastrophic forgetting
|
|
18
|
+
* - Consolidates important adaptations
|
|
19
|
+
* - Fisher information regularization
|
|
20
|
+
*
|
|
21
|
+
* 4. MEMORY-AUGMENTED RETRIEVAL
|
|
22
|
+
* - Episodic memory for context-aware embeddings
|
|
23
|
+
* - Attention over past similar embeddings
|
|
24
|
+
* - Domain prototype learning
|
|
25
|
+
*
|
|
26
|
+
* Architecture:
|
|
27
|
+
* ONNX(text) → [frozen 384d] → LoRA_A → LoRA_B → [adapted 384d]
|
|
28
|
+
* (384×r) (r×384)
|
|
29
|
+
*/
|
|
30
|
+
export interface AdaptiveConfig {
|
|
31
|
+
/** LoRA rank (lower = fewer params, higher = more expressive) */
|
|
32
|
+
loraRank?: number;
|
|
33
|
+
/** Learning rate for online updates */
|
|
34
|
+
learningRate?: number;
|
|
35
|
+
/** EWC regularization strength */
|
|
36
|
+
ewcLambda?: number;
|
|
37
|
+
/** Number of domain prototypes to maintain */
|
|
38
|
+
numPrototypes?: number;
|
|
39
|
+
/** Enable contrastive learning from co-edits */
|
|
40
|
+
contrastiveLearning?: boolean;
|
|
41
|
+
/** Temperature for contrastive loss */
|
|
42
|
+
contrastiveTemp?: number;
|
|
43
|
+
/** Memory capacity for episodic retrieval */
|
|
44
|
+
memoryCapacity?: number;
|
|
45
|
+
}
|
|
46
|
+
export interface LoRAWeights {
|
|
47
|
+
A: number[][];
|
|
48
|
+
B: number[][];
|
|
49
|
+
bias?: number[];
|
|
50
|
+
}
|
|
51
|
+
export interface DomainPrototype {
|
|
52
|
+
domain: string;
|
|
53
|
+
centroid: number[];
|
|
54
|
+
count: number;
|
|
55
|
+
variance: number;
|
|
56
|
+
}
|
|
57
|
+
export interface AdaptiveStats {
|
|
58
|
+
baseModel: string;
|
|
59
|
+
dimension: number;
|
|
60
|
+
loraRank: number;
|
|
61
|
+
loraParams: number;
|
|
62
|
+
adaptations: number;
|
|
63
|
+
prototypes: number;
|
|
64
|
+
memorySize: number;
|
|
65
|
+
ewcConsolidations: number;
|
|
66
|
+
contrastiveUpdates: number;
|
|
67
|
+
}
|
|
68
|
+
export declare class AdaptiveEmbedder {
|
|
69
|
+
private config;
|
|
70
|
+
private lora;
|
|
71
|
+
private prototypes;
|
|
72
|
+
private episodic;
|
|
73
|
+
private onnxReady;
|
|
74
|
+
private dimension;
|
|
75
|
+
private adaptationCount;
|
|
76
|
+
private ewcCount;
|
|
77
|
+
private contrastiveCount;
|
|
78
|
+
private coEditBuffer;
|
|
79
|
+
constructor(config?: AdaptiveConfig);
|
|
80
|
+
/**
|
|
81
|
+
* Initialize ONNX backend
|
|
82
|
+
*/
|
|
83
|
+
init(): Promise<void>;
|
|
84
|
+
/**
|
|
85
|
+
* Generate adaptive embedding
|
|
86
|
+
* Pipeline: ONNX → LoRA → Prototype Adjustment → Episodic Augmentation
|
|
87
|
+
*/
|
|
88
|
+
embed(text: string, options?: {
|
|
89
|
+
domain?: string;
|
|
90
|
+
useEpisodic?: boolean;
|
|
91
|
+
storeInMemory?: boolean;
|
|
92
|
+
}): Promise<number[]>;
|
|
93
|
+
/**
|
|
94
|
+
* Batch embed with adaptation
|
|
95
|
+
*/
|
|
96
|
+
embedBatch(texts: string[], options?: {
|
|
97
|
+
domain?: string;
|
|
98
|
+
}): Promise<number[][]>;
|
|
99
|
+
/**
|
|
100
|
+
* Learn from co-edit pattern (contrastive learning)
|
|
101
|
+
* Files edited together should have similar embeddings
|
|
102
|
+
*/
|
|
103
|
+
learnCoEdit(file1: string, content1: string, file2: string, content2: string): Promise<number>;
|
|
104
|
+
/**
|
|
105
|
+
* Process co-edit batch with contrastive loss
|
|
106
|
+
*/
|
|
107
|
+
private processCoEditBatch;
|
|
108
|
+
/**
|
|
109
|
+
* Learn from trajectory outcome (reinforcement-like)
|
|
110
|
+
*/
|
|
111
|
+
learnFromOutcome(context: string, action: string, success: boolean, quality?: number): Promise<void>;
|
|
112
|
+
/**
|
|
113
|
+
* EWC consolidation - prevent forgetting important adaptations
|
|
114
|
+
*/
|
|
115
|
+
consolidate(): Promise<void>;
|
|
116
|
+
/**
|
|
117
|
+
* Fallback hash embedding
|
|
118
|
+
*/
|
|
119
|
+
private hashEmbed;
|
|
120
|
+
private normalize;
|
|
121
|
+
/**
|
|
122
|
+
* Get statistics
|
|
123
|
+
*/
|
|
124
|
+
getStats(): AdaptiveStats;
|
|
125
|
+
/**
|
|
126
|
+
* Export learned weights
|
|
127
|
+
*/
|
|
128
|
+
export(): {
|
|
129
|
+
lora: LoRAWeights;
|
|
130
|
+
prototypes: DomainPrototype[];
|
|
131
|
+
stats: AdaptiveStats;
|
|
132
|
+
};
|
|
133
|
+
/**
|
|
134
|
+
* Import learned weights
|
|
135
|
+
*/
|
|
136
|
+
import(data: {
|
|
137
|
+
lora?: LoRAWeights;
|
|
138
|
+
prototypes?: DomainPrototype[];
|
|
139
|
+
}): void;
|
|
140
|
+
/**
|
|
141
|
+
* Reset adaptations
|
|
142
|
+
*/
|
|
143
|
+
reset(): void;
|
|
144
|
+
}
|
|
145
|
+
export declare function getAdaptiveEmbedder(config?: AdaptiveConfig): AdaptiveEmbedder;
|
|
146
|
+
export declare function initAdaptiveEmbedder(config?: AdaptiveConfig): Promise<AdaptiveEmbedder>;
|
|
147
|
+
export default AdaptiveEmbedder;
|
|
148
|
+
//# sourceMappingURL=adaptive-embedder.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"adaptive-embedder.d.ts","sourceRoot":"","sources":["../../src/core/adaptive-embedder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AAQH,MAAM,WAAW,cAAc;IAC7B,iEAAiE;IACjE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,uCAAuC;IACvC,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,kCAAkC;IAClC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,8CAA8C;IAC9C,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,gDAAgD;IAChD,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,uCAAuC;IACvC,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,6CAA6C;IAC7C,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,WAAW;IAC1B,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC;IACd,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;CACjB;AAED,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,aAAa;IAC5B,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,kBAAkB,EAAE,MAAM,CAAC;CAC5B;AAuZD,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,MAAM,CAA2B;IACzC,OAAO,CAAC,IAAI,CAAY;IACxB,OAAO,CAAC,UAAU,CAAkB;IACpC,OAAO,CAAC,QAAQ,CAAiB;IACjC,OAAO,CAAC,SAAS,CAAkB;IACnC,OAAO,CAAC,SAAS,CAAe;IAGhC,OAAO,CAAC,eAAe,CAAa;IACpC,OAAO,CAAC,QAAQ,CAAa;IAC7B,OAAO,CAAC,gBAAgB,CAAa;IAGrC,OAAO,CAAC,YAAY,CAA+E;gBAEvF,MAAM,GAAE,cAAmB;IAgBvC;;OAEG;IACG,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAO3B;;;OAGG;IACG,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE;QAClC,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,WAAW,CAAC,EAAE,OAAO,CAAC;QACtB,aAAa,CAAC,EAAE,OAAO,CAAC;KACzB,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAmCrB;;OAEG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,OAAO,CAAC,EAAE;QAC1C,MAAM,CAAC,EAAE,MAAM,CAAC;KACjB,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAsBvB;;;OAGG;IACG,WAAW,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAkBpG;;OAEG;IACH,OAAO,CAAC,kBAAkB;IA+B1B;;OAEG;IACG,gBAAgB,CACpB,OAAO,EAAE,MAAM,EACf,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,OAAO,EAChB,OAAO,GAAE,MAAY,GACpB,OAAO,CAAC,IAAI,CAAC;IAiBhB;;OAEG;IACG,WAAW,IAAI,OAAO,CAAC,IAAI,CAAC;IAelC;;OAEG;IACH,OAAO,CAAC,SAAS;IAoBjB,OAAO,CAAC,SAAS;IAKjB;;OAEG;IACH,QAAQ,IAAI,aAAa;IAczB;;OAEG;IACH,MAAM,IAAI;QACR,IAAI,EAAE,WAAW,CAAC;QAClB,UAAU,EAAE,eAAe,EAAE,CAAC;QAC9B,KAAK,EAAE,aAAa,CAAC;KACtB;IAQD;;OAEG;IACH,MAAM,CAAC,IAAI,EAAE;QAAE,IAAI,CAAC,EAAE,WAAW,CAAC;QAAC,UAAU,CAAC,EAAE,eAAe,EAAE,CAAA;KAAE,GAAG,IAAI;IAS1E;;OAEG;IACH,KAAK,IAAI,IAAI;CASd;AAQD,wBAAgB,mBAAmB,CAAC,MAAM,CAAC,EAAE,cAAc,GAAG,gBAAgB,CAK7E;AAED,wBAAsB,oBAAoB,CAAC,MAAM,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAI7F;AAED,eAAe,gBAAgB,CAAC"}
|
|
@@ -0,0 +1,594 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* AdaptiveEmbedder - Micro-LoRA Style Optimization for ONNX Embeddings
|
|
4
|
+
*
|
|
5
|
+
* Applies continual learning techniques to frozen ONNX embeddings:
|
|
6
|
+
*
|
|
7
|
+
* 1. MICRO-LORA ADAPTERS
|
|
8
|
+
* - Low-rank projection layers (rank 2-8) on top of frozen embeddings
|
|
9
|
+
* - Domain-specific fine-tuning with minimal parameters
|
|
10
|
+
* - ~0.1% of base model parameters
|
|
11
|
+
*
|
|
12
|
+
* 2. CONTRASTIVE LEARNING
|
|
13
|
+
* - Files edited together → embeddings closer
|
|
14
|
+
* - Semantic clustering from trajectories
|
|
15
|
+
* - Online learning from user behavior
|
|
16
|
+
*
|
|
17
|
+
* 3. EWC++ (Elastic Weight Consolidation)
|
|
18
|
+
* - Prevents catastrophic forgetting
|
|
19
|
+
* - Consolidates important adaptations
|
|
20
|
+
* - Fisher information regularization
|
|
21
|
+
*
|
|
22
|
+
* 4. MEMORY-AUGMENTED RETRIEVAL
|
|
23
|
+
* - Episodic memory for context-aware embeddings
|
|
24
|
+
* - Attention over past similar embeddings
|
|
25
|
+
* - Domain prototype learning
|
|
26
|
+
*
|
|
27
|
+
* Architecture:
|
|
28
|
+
* ONNX(text) → [frozen 384d] → LoRA_A → LoRA_B → [adapted 384d]
|
|
29
|
+
* (384×r) (r×384)
|
|
30
|
+
*/
|
|
31
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
32
|
+
exports.AdaptiveEmbedder = void 0;
|
|
33
|
+
exports.getAdaptiveEmbedder = getAdaptiveEmbedder;
|
|
34
|
+
exports.initAdaptiveEmbedder = initAdaptiveEmbedder;
|
|
35
|
+
const onnx_embedder_1 = require("./onnx-embedder");
|
|
36
|
+
// ============================================================================
|
|
37
|
+
// Micro-LoRA Layer
|
|
38
|
+
// ============================================================================
|
|
39
|
+
/**
|
|
40
|
+
* Low-rank adaptation layer for embeddings
|
|
41
|
+
* Implements: output = input + scale * (input @ A @ B)
|
|
42
|
+
*/
|
|
43
|
+
class MicroLoRA {
|
|
44
|
+
constructor(dim, rank, scale = 0.1) {
|
|
45
|
+
// EWC Fisher information (importance weights)
|
|
46
|
+
this.fisherA = null;
|
|
47
|
+
this.fisherB = null;
|
|
48
|
+
this.savedA = null;
|
|
49
|
+
this.savedB = null;
|
|
50
|
+
this.dim = dim;
|
|
51
|
+
this.rank = rank;
|
|
52
|
+
this.scale = scale;
|
|
53
|
+
// Initialize with small random values (Xavier-like)
|
|
54
|
+
const stdA = Math.sqrt(2 / (dim + rank));
|
|
55
|
+
const stdB = Math.sqrt(2 / (rank + dim)) * 0.01; // B starts near zero
|
|
56
|
+
this.A = this.initMatrix(dim, rank, stdA);
|
|
57
|
+
this.B = this.initMatrix(rank, dim, stdB);
|
|
58
|
+
this.gradA = this.zeroMatrix(dim, rank);
|
|
59
|
+
this.gradB = this.zeroMatrix(rank, dim);
|
|
60
|
+
}
|
|
61
|
+
initMatrix(rows, cols, std) {
|
|
62
|
+
return Array(rows).fill(0).map(() => Array(cols).fill(0).map(() => (Math.random() - 0.5) * 2 * std));
|
|
63
|
+
}
|
|
64
|
+
zeroMatrix(rows, cols) {
|
|
65
|
+
return Array(rows).fill(0).map(() => Array(cols).fill(0));
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Forward pass: input + scale * (input @ A @ B)
|
|
69
|
+
*/
|
|
70
|
+
forward(input) {
|
|
71
|
+
// Compute input @ A (dim → rank)
|
|
72
|
+
const hidden = new Array(this.rank).fill(0);
|
|
73
|
+
for (let r = 0; r < this.rank; r++) {
|
|
74
|
+
for (let d = 0; d < this.dim; d++) {
|
|
75
|
+
hidden[r] += input[d] * this.A[d][r];
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
// Compute hidden @ B (rank → dim) and add residual
|
|
79
|
+
const output = [...input];
|
|
80
|
+
for (let d = 0; d < this.dim; d++) {
|
|
81
|
+
let delta = 0;
|
|
82
|
+
for (let r = 0; r < this.rank; r++) {
|
|
83
|
+
delta += hidden[r] * this.B[r][d];
|
|
84
|
+
}
|
|
85
|
+
output[d] += this.scale * delta;
|
|
86
|
+
}
|
|
87
|
+
return output;
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Backward pass with contrastive loss
|
|
91
|
+
* Pulls positive pairs closer, pushes negatives apart
|
|
92
|
+
*/
|
|
93
|
+
backward(anchor, positive, negatives, lr, ewcLambda = 0) {
|
|
94
|
+
if (!positive && negatives.length === 0)
|
|
95
|
+
return 0;
|
|
96
|
+
// Compute adapted embeddings
|
|
97
|
+
const anchorOut = this.forward(anchor);
|
|
98
|
+
const positiveOut = positive ? this.forward(positive) : null;
|
|
99
|
+
const negativeOuts = negatives.map(n => this.forward(n));
|
|
100
|
+
// Contrastive loss with temperature scaling
|
|
101
|
+
const temp = 0.07;
|
|
102
|
+
let loss = 0;
|
|
103
|
+
if (positiveOut) {
|
|
104
|
+
// Positive similarity
|
|
105
|
+
const posSim = this.cosineSimilarity(anchorOut, positiveOut) / temp;
|
|
106
|
+
// Negative similarities
|
|
107
|
+
const negSims = negativeOuts.map(n => this.cosineSimilarity(anchorOut, n) / temp);
|
|
108
|
+
// InfoNCE loss
|
|
109
|
+
const maxSim = Math.max(posSim, ...negSims);
|
|
110
|
+
const expPos = Math.exp(posSim - maxSim);
|
|
111
|
+
const expNegs = negSims.reduce((sum, s) => sum + Math.exp(s - maxSim), 0);
|
|
112
|
+
loss = -Math.log(expPos / (expPos + expNegs) + 1e-8);
|
|
113
|
+
// Compute gradients (simplified)
|
|
114
|
+
const gradScale = lr * this.scale;
|
|
115
|
+
// Update A and B based on gradient direction
|
|
116
|
+
for (let d = 0; d < this.dim; d++) {
|
|
117
|
+
for (let r = 0; r < this.rank; r++) {
|
|
118
|
+
// Gradient from positive (pull closer)
|
|
119
|
+
const gradA = anchor[d] * (positiveOut[r] - anchorOut[r]) * gradScale;
|
|
120
|
+
this.A[d][r] += gradA;
|
|
121
|
+
// EWC regularization
|
|
122
|
+
if (ewcLambda > 0 && this.fisherA && this.savedA) {
|
|
123
|
+
this.A[d][r] -= ewcLambda * this.fisherA[d][r] * (this.A[d][r] - this.savedA[d][r]);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
for (let r = 0; r < this.rank; r++) {
|
|
128
|
+
for (let d = 0; d < this.dim; d++) {
|
|
129
|
+
const gradB = anchor[r] * (positiveOut[d] - anchorOut[d]) * gradScale * 0.1;
|
|
130
|
+
this.B[r][d] += gradB;
|
|
131
|
+
if (ewcLambda > 0 && this.fisherB && this.savedB) {
|
|
132
|
+
this.B[r][d] -= ewcLambda * this.fisherB[r][d] * (this.B[r][d] - this.savedB[r][d]);
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
return loss;
|
|
138
|
+
}
|
|
139
|
+
/**
|
|
140
|
+
* EWC consolidation - save current weights and compute Fisher information
|
|
141
|
+
*/
|
|
142
|
+
consolidate(embeddings) {
|
|
143
|
+
// Save current weights
|
|
144
|
+
this.savedA = this.A.map(row => [...row]);
|
|
145
|
+
this.savedB = this.B.map(row => [...row]);
|
|
146
|
+
// Estimate Fisher information (diagonal approximation)
|
|
147
|
+
this.fisherA = this.zeroMatrix(this.dim, this.rank);
|
|
148
|
+
this.fisherB = this.zeroMatrix(this.rank, this.dim);
|
|
149
|
+
for (const emb of embeddings) {
|
|
150
|
+
const out = this.forward(emb);
|
|
151
|
+
// Accumulate squared gradients as Fisher estimate
|
|
152
|
+
for (let d = 0; d < this.dim; d++) {
|
|
153
|
+
for (let r = 0; r < this.rank; r++) {
|
|
154
|
+
this.fisherA[d][r] += emb[d] * emb[d] / embeddings.length;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
cosineSimilarity(a, b) {
|
|
160
|
+
let dot = 0, normA = 0, normB = 0;
|
|
161
|
+
for (let i = 0; i < a.length; i++) {
|
|
162
|
+
dot += a[i] * b[i];
|
|
163
|
+
normA += a[i] * a[i];
|
|
164
|
+
normB += b[i] * b[i];
|
|
165
|
+
}
|
|
166
|
+
return dot / (Math.sqrt(normA) * Math.sqrt(normB) + 1e-8);
|
|
167
|
+
}
|
|
168
|
+
getParams() {
|
|
169
|
+
return this.dim * this.rank + this.rank * this.dim;
|
|
170
|
+
}
|
|
171
|
+
export() {
|
|
172
|
+
return {
|
|
173
|
+
A: this.A.map(row => [...row]),
|
|
174
|
+
B: this.B.map(row => [...row]),
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
import(weights) {
|
|
178
|
+
this.A = weights.A.map(row => [...row]);
|
|
179
|
+
this.B = weights.B.map(row => [...row]);
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
// ============================================================================
|
|
183
|
+
// Domain Prototype Learning
|
|
184
|
+
// ============================================================================
|
|
185
|
+
class PrototypeMemory {
|
|
186
|
+
constructor(maxPrototypes = 50) {
|
|
187
|
+
this.prototypes = new Map();
|
|
188
|
+
this.maxPrototypes = maxPrototypes;
|
|
189
|
+
}
|
|
190
|
+
/**
|
|
191
|
+
* Update prototype with new embedding (online mean update)
|
|
192
|
+
*/
|
|
193
|
+
update(domain, embedding) {
|
|
194
|
+
const existing = this.prototypes.get(domain);
|
|
195
|
+
if (existing) {
|
|
196
|
+
// Online mean update: new_mean = old_mean + (x - old_mean) / n
|
|
197
|
+
const n = existing.count + 1;
|
|
198
|
+
for (let i = 0; i < embedding.length; i++) {
|
|
199
|
+
const delta = embedding[i] - existing.centroid[i];
|
|
200
|
+
existing.centroid[i] += delta / n;
|
|
201
|
+
// Update variance using Welford's algorithm
|
|
202
|
+
existing.variance += delta * (embedding[i] - existing.centroid[i]);
|
|
203
|
+
}
|
|
204
|
+
existing.count = n;
|
|
205
|
+
}
|
|
206
|
+
else {
|
|
207
|
+
// Create new prototype
|
|
208
|
+
if (this.prototypes.size >= this.maxPrototypes) {
|
|
209
|
+
// Remove least used prototype
|
|
210
|
+
let minCount = Infinity;
|
|
211
|
+
let minKey = '';
|
|
212
|
+
for (const [key, proto] of this.prototypes) {
|
|
213
|
+
if (proto.count < minCount) {
|
|
214
|
+
minCount = proto.count;
|
|
215
|
+
minKey = key;
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
this.prototypes.delete(minKey);
|
|
219
|
+
}
|
|
220
|
+
this.prototypes.set(domain, {
|
|
221
|
+
domain,
|
|
222
|
+
centroid: [...embedding],
|
|
223
|
+
count: 1,
|
|
224
|
+
variance: 0,
|
|
225
|
+
});
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
/**
|
|
229
|
+
* Find closest prototype and return domain-adjusted embedding
|
|
230
|
+
*/
|
|
231
|
+
adjust(embedding) {
|
|
232
|
+
if (this.prototypes.size === 0) {
|
|
233
|
+
return { adjusted: embedding, domain: null, confidence: 0 };
|
|
234
|
+
}
|
|
235
|
+
let bestSim = -Infinity;
|
|
236
|
+
let bestProto = null;
|
|
237
|
+
for (const proto of this.prototypes.values()) {
|
|
238
|
+
const sim = this.cosineSimilarity(embedding, proto.centroid);
|
|
239
|
+
if (sim > bestSim) {
|
|
240
|
+
bestSim = sim;
|
|
241
|
+
bestProto = proto;
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
if (!bestProto || bestSim < 0.5) {
|
|
245
|
+
return { adjusted: embedding, domain: null, confidence: 0 };
|
|
246
|
+
}
|
|
247
|
+
// Adjust embedding toward prototype (soft assignment)
|
|
248
|
+
const alpha = 0.1 * bestSim; // Stronger adjustment for higher similarity
|
|
249
|
+
const adjusted = embedding.map((v, i) => v * (1 - alpha) + bestProto.centroid[i] * alpha);
|
|
250
|
+
return {
|
|
251
|
+
adjusted,
|
|
252
|
+
domain: bestProto.domain,
|
|
253
|
+
confidence: bestSim,
|
|
254
|
+
};
|
|
255
|
+
}
|
|
256
|
+
cosineSimilarity(a, b) {
|
|
257
|
+
let dot = 0, normA = 0, normB = 0;
|
|
258
|
+
for (let i = 0; i < a.length; i++) {
|
|
259
|
+
dot += a[i] * b[i];
|
|
260
|
+
normA += a[i] * a[i];
|
|
261
|
+
normB += b[i] * b[i];
|
|
262
|
+
}
|
|
263
|
+
return dot / (Math.sqrt(normA) * Math.sqrt(normB) + 1e-8);
|
|
264
|
+
}
|
|
265
|
+
getPrototypes() {
|
|
266
|
+
return Array.from(this.prototypes.values());
|
|
267
|
+
}
|
|
268
|
+
export() {
|
|
269
|
+
return this.getPrototypes();
|
|
270
|
+
}
|
|
271
|
+
import(prototypes) {
|
|
272
|
+
this.prototypes.clear();
|
|
273
|
+
for (const p of prototypes) {
|
|
274
|
+
this.prototypes.set(p.domain, p);
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
class EpisodicMemory {
|
|
279
|
+
constructor(capacity = 1000) {
|
|
280
|
+
this.entries = [];
|
|
281
|
+
this.capacity = capacity;
|
|
282
|
+
}
|
|
283
|
+
add(embedding, context) {
|
|
284
|
+
if (this.entries.length >= this.capacity) {
|
|
285
|
+
// Remove least recently used
|
|
286
|
+
this.entries.sort((a, b) => a.useCount - b.useCount);
|
|
287
|
+
this.entries.shift();
|
|
288
|
+
}
|
|
289
|
+
this.entries.push({
|
|
290
|
+
embedding,
|
|
291
|
+
context,
|
|
292
|
+
timestamp: Date.now(),
|
|
293
|
+
useCount: 0,
|
|
294
|
+
});
|
|
295
|
+
}
|
|
296
|
+
/**
|
|
297
|
+
* Retrieve similar past embeddings for context augmentation
|
|
298
|
+
*/
|
|
299
|
+
retrieve(query, k = 5) {
|
|
300
|
+
const scored = this.entries.map(entry => ({
|
|
301
|
+
entry,
|
|
302
|
+
similarity: this.cosineSimilarity(query, entry.embedding),
|
|
303
|
+
}));
|
|
304
|
+
scored.sort((a, b) => b.similarity - a.similarity);
|
|
305
|
+
// Mark as used
|
|
306
|
+
for (let i = 0; i < Math.min(k, scored.length); i++) {
|
|
307
|
+
scored[i].entry.useCount++;
|
|
308
|
+
}
|
|
309
|
+
return scored.slice(0, k).map(s => s.entry);
|
|
310
|
+
}
|
|
311
|
+
/**
|
|
312
|
+
* Augment embedding with episodic memory (attention-like)
|
|
313
|
+
*/
|
|
314
|
+
augment(embedding, k = 3) {
|
|
315
|
+
const similar = this.retrieve(embedding, k);
|
|
316
|
+
if (similar.length === 0)
|
|
317
|
+
return embedding;
|
|
318
|
+
// Weighted average with query
|
|
319
|
+
const weights = similar.map(s => Math.exp(this.cosineSimilarity(embedding, s.embedding) / 0.1));
|
|
320
|
+
const sumWeights = weights.reduce((a, b) => a + b, 0) + 1; // +1 for query
|
|
321
|
+
const augmented = embedding.map((v, i) => {
|
|
322
|
+
let sum = v; // Query contribution
|
|
323
|
+
for (let j = 0; j < similar.length; j++) {
|
|
324
|
+
sum += weights[j] * similar[j].embedding[i];
|
|
325
|
+
}
|
|
326
|
+
return sum / sumWeights;
|
|
327
|
+
});
|
|
328
|
+
return augmented;
|
|
329
|
+
}
|
|
330
|
+
cosineSimilarity(a, b) {
|
|
331
|
+
let dot = 0, normA = 0, normB = 0;
|
|
332
|
+
for (let i = 0; i < a.length; i++) {
|
|
333
|
+
dot += a[i] * b[i];
|
|
334
|
+
normA += a[i] * a[i];
|
|
335
|
+
normB += b[i] * b[i];
|
|
336
|
+
}
|
|
337
|
+
return dot / (Math.sqrt(normA) * Math.sqrt(normB) + 1e-8);
|
|
338
|
+
}
|
|
339
|
+
size() {
|
|
340
|
+
return this.entries.length;
|
|
341
|
+
}
|
|
342
|
+
clear() {
|
|
343
|
+
this.entries = [];
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
// ============================================================================
|
|
347
|
+
// Adaptive Embedder (Main Class)
|
|
348
|
+
// ============================================================================
|
|
349
|
+
class AdaptiveEmbedder {
|
|
350
|
+
constructor(config = {}) {
|
|
351
|
+
this.onnxReady = false;
|
|
352
|
+
this.dimension = 384;
|
|
353
|
+
// Stats
|
|
354
|
+
this.adaptationCount = 0;
|
|
355
|
+
this.ewcCount = 0;
|
|
356
|
+
this.contrastiveCount = 0;
|
|
357
|
+
// Co-edit buffer for contrastive learning
|
|
358
|
+
this.coEditBuffer = [];
|
|
359
|
+
this.config = {
|
|
360
|
+
loraRank: config.loraRank ?? 4,
|
|
361
|
+
learningRate: config.learningRate ?? 0.01,
|
|
362
|
+
ewcLambda: config.ewcLambda ?? 0.1,
|
|
363
|
+
numPrototypes: config.numPrototypes ?? 50,
|
|
364
|
+
contrastiveLearning: config.contrastiveLearning ?? true,
|
|
365
|
+
contrastiveTemp: config.contrastiveTemp ?? 0.07,
|
|
366
|
+
memoryCapacity: config.memoryCapacity ?? 1000,
|
|
367
|
+
};
|
|
368
|
+
this.lora = new MicroLoRA(this.dimension, this.config.loraRank);
|
|
369
|
+
this.prototypes = new PrototypeMemory(this.config.numPrototypes);
|
|
370
|
+
this.episodic = new EpisodicMemory(this.config.memoryCapacity);
|
|
371
|
+
}
|
|
372
|
+
/**
|
|
373
|
+
* Initialize ONNX backend
|
|
374
|
+
*/
|
|
375
|
+
async init() {
|
|
376
|
+
if ((0, onnx_embedder_1.isOnnxAvailable)()) {
|
|
377
|
+
await (0, onnx_embedder_1.initOnnxEmbedder)();
|
|
378
|
+
this.onnxReady = true;
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
/**
|
|
382
|
+
* Generate adaptive embedding
|
|
383
|
+
* Pipeline: ONNX → LoRA → Prototype Adjustment → Episodic Augmentation
|
|
384
|
+
*/
|
|
385
|
+
async embed(text, options) {
|
|
386
|
+
// Step 1: Get base ONNX embedding
|
|
387
|
+
let baseEmb;
|
|
388
|
+
if (this.onnxReady) {
|
|
389
|
+
const result = await (0, onnx_embedder_1.embed)(text);
|
|
390
|
+
baseEmb = result.embedding;
|
|
391
|
+
}
|
|
392
|
+
else {
|
|
393
|
+
// Fallback to hash embedding
|
|
394
|
+
baseEmb = this.hashEmbed(text);
|
|
395
|
+
}
|
|
396
|
+
// Step 2: Apply LoRA adaptation
|
|
397
|
+
let adapted = this.lora.forward(baseEmb);
|
|
398
|
+
// Step 3: Prototype adjustment (if domain specified)
|
|
399
|
+
if (options?.domain) {
|
|
400
|
+
this.prototypes.update(options.domain, adapted);
|
|
401
|
+
}
|
|
402
|
+
const { adjusted, domain } = this.prototypes.adjust(adapted);
|
|
403
|
+
adapted = adjusted;
|
|
404
|
+
// Step 4: Episodic memory augmentation
|
|
405
|
+
if (options?.useEpisodic !== false) {
|
|
406
|
+
adapted = this.episodic.augment(adapted);
|
|
407
|
+
}
|
|
408
|
+
// Step 5: Store in episodic memory
|
|
409
|
+
if (options?.storeInMemory !== false) {
|
|
410
|
+
this.episodic.add(adapted, text.slice(0, 100));
|
|
411
|
+
}
|
|
412
|
+
// Normalize
|
|
413
|
+
return this.normalize(adapted);
|
|
414
|
+
}
|
|
415
|
+
/**
|
|
416
|
+
* Batch embed with adaptation
|
|
417
|
+
*/
|
|
418
|
+
async embedBatch(texts, options) {
|
|
419
|
+
const results = [];
|
|
420
|
+
if (this.onnxReady) {
|
|
421
|
+
const baseResults = await (0, onnx_embedder_1.embedBatch)(texts);
|
|
422
|
+
for (let i = 0; i < baseResults.length; i++) {
|
|
423
|
+
let adapted = this.lora.forward(baseResults[i].embedding);
|
|
424
|
+
if (options?.domain) {
|
|
425
|
+
this.prototypes.update(options.domain, adapted);
|
|
426
|
+
}
|
|
427
|
+
const { adjusted } = this.prototypes.adjust(adapted);
|
|
428
|
+
results.push(this.normalize(adjusted));
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
else {
|
|
432
|
+
for (const text of texts) {
|
|
433
|
+
results.push(await this.embed(text, options));
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
return results;
|
|
437
|
+
}
|
|
438
|
+
/**
|
|
439
|
+
* Learn from co-edit pattern (contrastive learning)
|
|
440
|
+
* Files edited together should have similar embeddings
|
|
441
|
+
*/
|
|
442
|
+
async learnCoEdit(file1, content1, file2, content2) {
|
|
443
|
+
if (!this.config.contrastiveLearning)
|
|
444
|
+
return 0;
|
|
445
|
+
// Get embeddings
|
|
446
|
+
const emb1 = await this.embed(content1.slice(0, 512), { storeInMemory: false });
|
|
447
|
+
const emb2 = await this.embed(content2.slice(0, 512), { storeInMemory: false });
|
|
448
|
+
// Store in buffer for batch learning
|
|
449
|
+
this.coEditBuffer.push({ file1, emb1, file2, emb2 });
|
|
450
|
+
// Process batch when buffer is full
|
|
451
|
+
if (this.coEditBuffer.length >= 16) {
|
|
452
|
+
return this.processCoEditBatch();
|
|
453
|
+
}
|
|
454
|
+
return 0;
|
|
455
|
+
}
|
|
456
|
+
/**
|
|
457
|
+
* Process co-edit batch with contrastive loss
|
|
458
|
+
*/
|
|
459
|
+
processCoEditBatch() {
|
|
460
|
+
if (this.coEditBuffer.length < 2)
|
|
461
|
+
return 0;
|
|
462
|
+
let totalLoss = 0;
|
|
463
|
+
for (const { emb1, emb2 } of this.coEditBuffer) {
|
|
464
|
+
// Use other pairs as negatives
|
|
465
|
+
const negatives = this.coEditBuffer
|
|
466
|
+
.filter(p => p.emb1 !== emb1)
|
|
467
|
+
.slice(0, 4)
|
|
468
|
+
.map(p => p.emb1);
|
|
469
|
+
// Backward pass with contrastive loss
|
|
470
|
+
const loss = this.lora.backward(emb1, emb2, negatives, this.config.learningRate, this.config.ewcLambda);
|
|
471
|
+
totalLoss += loss;
|
|
472
|
+
this.contrastiveCount++;
|
|
473
|
+
}
|
|
474
|
+
this.coEditBuffer = [];
|
|
475
|
+
this.adaptationCount++;
|
|
476
|
+
return totalLoss / this.coEditBuffer.length;
|
|
477
|
+
}
|
|
478
|
+
/**
|
|
479
|
+
* Learn from trajectory outcome (reinforcement-like)
|
|
480
|
+
*/
|
|
481
|
+
async learnFromOutcome(context, action, success, quality = 0.5) {
|
|
482
|
+
const contextEmb = await this.embed(context, { storeInMemory: false });
|
|
483
|
+
const actionEmb = await this.embed(action, { storeInMemory: false });
|
|
484
|
+
if (success && quality > 0.7) {
|
|
485
|
+
// Positive outcome - pull embeddings closer
|
|
486
|
+
this.lora.backward(contextEmb, actionEmb, [], this.config.learningRate * quality, this.config.ewcLambda);
|
|
487
|
+
this.adaptationCount++;
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
/**
|
|
491
|
+
* EWC consolidation - prevent forgetting important adaptations
|
|
492
|
+
*/
|
|
493
|
+
async consolidate() {
|
|
494
|
+
// Collect current episodic memories for Fisher estimation
|
|
495
|
+
const embeddings = [];
|
|
496
|
+
const entries = this.episodic['entries'] || [];
|
|
497
|
+
for (const entry of entries.slice(-100)) {
|
|
498
|
+
embeddings.push(entry.embedding);
|
|
499
|
+
}
|
|
500
|
+
if (embeddings.length > 10) {
|
|
501
|
+
this.lora.consolidate(embeddings);
|
|
502
|
+
this.ewcCount++;
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
/**
|
|
506
|
+
* Fallback hash embedding
|
|
507
|
+
*/
|
|
508
|
+
hashEmbed(text) {
|
|
509
|
+
const embedding = new Array(this.dimension).fill(0);
|
|
510
|
+
const tokens = text.toLowerCase().split(/\s+/);
|
|
511
|
+
for (let t = 0; t < tokens.length; t++) {
|
|
512
|
+
const token = tokens[t];
|
|
513
|
+
const posWeight = 1 / (1 + t * 0.1);
|
|
514
|
+
for (let i = 0; i < token.length; i++) {
|
|
515
|
+
const code = token.charCodeAt(i);
|
|
516
|
+
const h1 = (code * 31 + i * 17 + t * 7) % this.dimension;
|
|
517
|
+
const h2 = (code * 37 + i * 23 + t * 11) % this.dimension;
|
|
518
|
+
embedding[h1] += posWeight;
|
|
519
|
+
embedding[h2] += posWeight * 0.5;
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
return this.normalize(embedding);
|
|
523
|
+
}
|
|
524
|
+
normalize(v) {
|
|
525
|
+
const norm = Math.sqrt(v.reduce((a, b) => a + b * b, 0));
|
|
526
|
+
return norm > 0 ? v.map(x => x / norm) : v;
|
|
527
|
+
}
|
|
528
|
+
/**
|
|
529
|
+
* Get statistics
|
|
530
|
+
*/
|
|
531
|
+
getStats() {
|
|
532
|
+
return {
|
|
533
|
+
baseModel: 'all-MiniLM-L6-v2',
|
|
534
|
+
dimension: this.dimension,
|
|
535
|
+
loraRank: this.config.loraRank,
|
|
536
|
+
loraParams: this.lora.getParams(),
|
|
537
|
+
adaptations: this.adaptationCount,
|
|
538
|
+
prototypes: this.prototypes.getPrototypes().length,
|
|
539
|
+
memorySize: this.episodic.size(),
|
|
540
|
+
ewcConsolidations: this.ewcCount,
|
|
541
|
+
contrastiveUpdates: this.contrastiveCount,
|
|
542
|
+
};
|
|
543
|
+
}
|
|
544
|
+
/**
|
|
545
|
+
* Export learned weights
|
|
546
|
+
*/
|
|
547
|
+
export() {
|
|
548
|
+
return {
|
|
549
|
+
lora: this.lora.export(),
|
|
550
|
+
prototypes: this.prototypes.export(),
|
|
551
|
+
stats: this.getStats(),
|
|
552
|
+
};
|
|
553
|
+
}
|
|
554
|
+
/**
|
|
555
|
+
* Import learned weights
|
|
556
|
+
*/
|
|
557
|
+
import(data) {
|
|
558
|
+
if (data.lora) {
|
|
559
|
+
this.lora.import(data.lora);
|
|
560
|
+
}
|
|
561
|
+
if (data.prototypes) {
|
|
562
|
+
this.prototypes.import(data.prototypes);
|
|
563
|
+
}
|
|
564
|
+
}
|
|
565
|
+
/**
|
|
566
|
+
* Reset adaptations
|
|
567
|
+
*/
|
|
568
|
+
reset() {
|
|
569
|
+
this.lora = new MicroLoRA(this.dimension, this.config.loraRank);
|
|
570
|
+
this.prototypes = new PrototypeMemory(this.config.numPrototypes);
|
|
571
|
+
this.episodic.clear();
|
|
572
|
+
this.adaptationCount = 0;
|
|
573
|
+
this.ewcCount = 0;
|
|
574
|
+
this.contrastiveCount = 0;
|
|
575
|
+
this.coEditBuffer = [];
|
|
576
|
+
}
|
|
577
|
+
}
|
|
578
|
+
exports.AdaptiveEmbedder = AdaptiveEmbedder;
|
|
579
|
+
// ============================================================================
|
|
580
|
+
// Factory & Singleton
|
|
581
|
+
// ============================================================================
|
|
582
|
+
let instance = null;
|
|
583
|
+
function getAdaptiveEmbedder(config) {
|
|
584
|
+
if (!instance) {
|
|
585
|
+
instance = new AdaptiveEmbedder(config);
|
|
586
|
+
}
|
|
587
|
+
return instance;
|
|
588
|
+
}
|
|
589
|
+
async function initAdaptiveEmbedder(config) {
|
|
590
|
+
const embedder = getAdaptiveEmbedder(config);
|
|
591
|
+
await embedder.init();
|
|
592
|
+
return embedder;
|
|
593
|
+
}
|
|
594
|
+
exports.default = AdaptiveEmbedder;
|
package/dist/core/index.d.ts
CHANGED
|
@@ -21,6 +21,7 @@ export * from './coverage-router';
|
|
|
21
21
|
export * from './graph-algorithms';
|
|
22
22
|
export * from './tensor-compress';
|
|
23
23
|
export * from './learning-engine';
|
|
24
|
+
export * from './adaptive-embedder';
|
|
24
25
|
export * from '../analysis';
|
|
25
26
|
export { default as gnnWrapper } from './gnn-wrapper';
|
|
26
27
|
export { default as attentionFallbacks } from './attention-fallbacks';
|
|
@@ -37,4 +38,5 @@ export { default as CodeParser } from './ast-parser';
|
|
|
37
38
|
export { CodeParser as ASTParser } from './ast-parser';
|
|
38
39
|
export { default as TensorCompress } from './tensor-compress';
|
|
39
40
|
export { default as LearningEngine } from './learning-engine';
|
|
41
|
+
export { default as AdaptiveEmbedder } from './adaptive-embedder';
|
|
40
42
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/core/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/core/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,cAAc,eAAe,CAAC;AAC9B,cAAc,uBAAuB,CAAC;AACtC,cAAc,gBAAgB,CAAC;AAC/B,cAAc,gBAAgB,CAAC;AAC/B,cAAc,uBAAuB,CAAC;AACtC,cAAc,iBAAiB,CAAC;AAChC,cAAc,yBAAyB,CAAC;AACxC,cAAc,oBAAoB,CAAC;AACnC,cAAc,kBAAkB,CAAC;AACjC,cAAc,iBAAiB,CAAC;AAChC,cAAc,mBAAmB,CAAC;AAClC,cAAc,cAAc,CAAC;AAC7B,cAAc,mBAAmB,CAAC;AAClC,cAAc,mBAAmB,CAAC;AAClC,cAAc,oBAAoB,CAAC;AACnC,cAAc,mBAAmB,CAAC;AAClC,cAAc,mBAAmB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/core/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,cAAc,eAAe,CAAC;AAC9B,cAAc,uBAAuB,CAAC;AACtC,cAAc,gBAAgB,CAAC;AAC/B,cAAc,gBAAgB,CAAC;AAC/B,cAAc,uBAAuB,CAAC;AACtC,cAAc,iBAAiB,CAAC;AAChC,cAAc,yBAAyB,CAAC;AACxC,cAAc,oBAAoB,CAAC;AACnC,cAAc,kBAAkB,CAAC;AACjC,cAAc,iBAAiB,CAAC;AAChC,cAAc,mBAAmB,CAAC;AAClC,cAAc,cAAc,CAAC;AAC7B,cAAc,mBAAmB,CAAC;AAClC,cAAc,mBAAmB,CAAC;AAClC,cAAc,oBAAoB,CAAC;AACnC,cAAc,mBAAmB,CAAC;AAClC,cAAc,mBAAmB,CAAC;AAClC,cAAc,qBAAqB,CAAC;AAGpC,cAAc,aAAa,CAAC;AAG5B,OAAO,EAAE,OAAO,IAAI,UAAU,EAAE,MAAM,eAAe,CAAC;AACtD,OAAO,EAAE,OAAO,IAAI,kBAAkB,EAAE,MAAM,uBAAuB,CAAC;AACtE,OAAO,EAAE,OAAO,IAAI,WAAW,EAAE,MAAM,gBAAgB,CAAC;AACxD,OAAO,EAAE,OAAO,IAAI,IAAI,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,OAAO,IAAI,kBAAkB,EAAE,MAAM,uBAAuB,CAAC;AACtE,OAAO,EAAE,OAAO,IAAI,YAAY,EAAE,MAAM,iBAAiB,CAAC;AAC1D,OAAO,EAAE,OAAO,IAAI,oBAAoB,EAAE,MAAM,yBAAyB,CAAC;AAC1E,OAAO,EAAE,OAAO,IAAI,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AACnE,OAAO,EAAE,OAAO,IAAI,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAC7D,OAAO,EAAE,OAAO,IAAI,SAAS,EAAE,MAAM,iBAAiB,CAAC;AACvD,OAAO,EAAE,OAAO,IAAI,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAC/D,OAAO,EAAE,OAAO,IAAI,UAAU,EAAE,MAAM,cAAc,CAAC;AAGrD,OAAO,EAAE,UAAU,IAAI,SAAS,EAAE,MAAM,cAAc,CAAC;AAGvD,OAAO,EAAE,OAAO,IAAI,cAAc,EAAE,MAAM,mBAAmB,CAAC;AAC9D,OAAO,EAAE,OAAO,IAAI,cAAc,EAAE,MAAM,mBAAmB,CAAC;AAC9D,OAAO,EAAE,OAAO,IAAI,gBAAgB,EAAE,MAAM,qBAAqB,CAAC"}
|
package/dist/core/index.js
CHANGED
|
@@ -23,7 +23,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
23
23
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
24
24
|
};
|
|
25
25
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
26
|
-
exports.LearningEngine = exports.TensorCompress = exports.ASTParser = exports.CodeParser = exports.RuvectorCluster = exports.CodeGraph = exports.SemanticRouter = exports.ExtendedWorkerPool = exports.ParallelIntelligence = exports.OnnxEmbedder = exports.IntelligenceEngine = exports.Sona = exports.agentdbFast = exports.attentionFallbacks = exports.gnnWrapper = void 0;
|
|
26
|
+
exports.AdaptiveEmbedder = exports.LearningEngine = exports.TensorCompress = exports.ASTParser = exports.CodeParser = exports.RuvectorCluster = exports.CodeGraph = exports.SemanticRouter = exports.ExtendedWorkerPool = exports.ParallelIntelligence = exports.OnnxEmbedder = exports.IntelligenceEngine = exports.Sona = exports.agentdbFast = exports.attentionFallbacks = exports.gnnWrapper = void 0;
|
|
27
27
|
__exportStar(require("./gnn-wrapper"), exports);
|
|
28
28
|
__exportStar(require("./attention-fallbacks"), exports);
|
|
29
29
|
__exportStar(require("./agentdb-fast"), exports);
|
|
@@ -41,6 +41,7 @@ __exportStar(require("./coverage-router"), exports);
|
|
|
41
41
|
__exportStar(require("./graph-algorithms"), exports);
|
|
42
42
|
__exportStar(require("./tensor-compress"), exports);
|
|
43
43
|
__exportStar(require("./learning-engine"), exports);
|
|
44
|
+
__exportStar(require("./adaptive-embedder"), exports);
|
|
44
45
|
// Analysis module (consolidated security, complexity, patterns)
|
|
45
46
|
__exportStar(require("../analysis"), exports);
|
|
46
47
|
// Re-export default objects for convenience
|
|
@@ -76,3 +77,5 @@ var tensor_compress_1 = require("./tensor-compress");
|
|
|
76
77
|
Object.defineProperty(exports, "TensorCompress", { enumerable: true, get: function () { return __importDefault(tensor_compress_1).default; } });
|
|
77
78
|
var learning_engine_1 = require("./learning-engine");
|
|
78
79
|
Object.defineProperty(exports, "LearningEngine", { enumerable: true, get: function () { return __importDefault(learning_engine_1).default; } });
|
|
80
|
+
var adaptive_embedder_1 = require("./adaptive-embedder");
|
|
81
|
+
Object.defineProperty(exports, "AdaptiveEmbedder", { enumerable: true, get: function () { return __importDefault(adaptive_embedder_1).default; } });
|