@sparkleideas/embeddings 3.0.0-alpha.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +651 -0
- package/package.json +66 -0
- package/src/__tests__/embedding-service.test.ts +126 -0
- package/src/chunking.ts +351 -0
- package/src/embedding-service.ts +1136 -0
- package/src/hyperbolic.ts +458 -0
- package/src/index.ts +116 -0
- package/src/neural-integration.ts +295 -0
- package/src/normalization.ts +267 -0
- package/src/persistent-cache.ts +410 -0
- package/src/types.ts +282 -0
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Neural Substrate Integration
|
|
3
|
+
*
|
|
4
|
+
* Integrates @sparkleideas/agentic-flow's neural embedding features:
|
|
5
|
+
* - Semantic drift detection
|
|
6
|
+
* - Memory physics (hippocampal dynamics)
|
|
7
|
+
* - Embedding state machine
|
|
8
|
+
* - Swarm coordination
|
|
9
|
+
* - Coherence monitoring
|
|
10
|
+
*
|
|
11
|
+
* These features treat embeddings as a synthetic nervous system.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
// Types from @sparkleideas/agentic-flow/embeddings
|
|
15
|
+
export interface DriftResult {
|
|
16
|
+
distance: number;
|
|
17
|
+
velocity: number;
|
|
18
|
+
acceleration: number;
|
|
19
|
+
trend: 'stable' | 'drifting' | 'accelerating' | 'recovering';
|
|
20
|
+
shouldEscalate: boolean;
|
|
21
|
+
shouldTriggerReasoning: boolean;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export interface MemoryEntry {
|
|
25
|
+
id: string;
|
|
26
|
+
embedding: Float32Array;
|
|
27
|
+
content: string;
|
|
28
|
+
strength: number;
|
|
29
|
+
timestamp: number;
|
|
30
|
+
accessCount: number;
|
|
31
|
+
associations: string[];
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export interface AgentState {
|
|
35
|
+
id: string;
|
|
36
|
+
position: Float32Array;
|
|
37
|
+
velocity: Float32Array;
|
|
38
|
+
attention: Float32Array;
|
|
39
|
+
energy: number;
|
|
40
|
+
lastUpdate: number;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export interface CoherenceResult {
|
|
44
|
+
isCoherent: boolean;
|
|
45
|
+
anomalyScore: number;
|
|
46
|
+
stabilityScore: number;
|
|
47
|
+
driftDirection: Float32Array | null;
|
|
48
|
+
warnings: string[];
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export interface SubstrateHealth {
|
|
52
|
+
memoryCount: number;
|
|
53
|
+
activeAgents: number;
|
|
54
|
+
avgDrift: number;
|
|
55
|
+
avgCoherence: number;
|
|
56
|
+
lastConsolidation: number;
|
|
57
|
+
uptime: number;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export interface NeuralSubstrateConfig {
|
|
61
|
+
dimension?: number;
|
|
62
|
+
driftThreshold?: number;
|
|
63
|
+
decayRate?: number;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Lazy-loaded Neural Substrate wrapper
|
|
68
|
+
*
|
|
69
|
+
* Wraps @sparkleideas/agentic-flow's NeuralSubstrate with graceful fallback
|
|
70
|
+
*/
|
|
71
|
+
export class NeuralEmbeddingService {
|
|
72
|
+
private substrate: any = null;
|
|
73
|
+
private initialized = false;
|
|
74
|
+
private available = false;
|
|
75
|
+
|
|
76
|
+
constructor(private config: NeuralSubstrateConfig = {}) {}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Initialize neural substrate
|
|
80
|
+
*/
|
|
81
|
+
async init(): Promise<boolean> {
|
|
82
|
+
if (this.initialized) return this.available;
|
|
83
|
+
|
|
84
|
+
try {
|
|
85
|
+
const { getNeuralSubstrate } = await import('@sparkleideas/agentic-flow/embeddings');
|
|
86
|
+
this.substrate = await getNeuralSubstrate(this.config);
|
|
87
|
+
await this.substrate.init();
|
|
88
|
+
this.available = true;
|
|
89
|
+
} catch (error) {
|
|
90
|
+
console.warn('[neural] Neural substrate not available:', error instanceof Error ? error.message : error);
|
|
91
|
+
this.available = false;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
this.initialized = true;
|
|
95
|
+
return this.available;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Check if neural features are available
|
|
100
|
+
*/
|
|
101
|
+
isAvailable(): boolean {
|
|
102
|
+
return this.available;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Detect semantic drift from baseline
|
|
107
|
+
*/
|
|
108
|
+
async detectDrift(input: string): Promise<DriftResult | null> {
|
|
109
|
+
if (!this.available || !this.substrate) return null;
|
|
110
|
+
return this.substrate.drift.detect(input);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Set baseline for drift detection
|
|
115
|
+
*/
|
|
116
|
+
async setDriftBaseline(context: string): Promise<void> {
|
|
117
|
+
if (!this.available || !this.substrate) return;
|
|
118
|
+
await this.substrate.drift.setBaseline(context);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Store memory with interference detection
|
|
123
|
+
*/
|
|
124
|
+
async storeMemory(id: string, content: string): Promise<{ stored: boolean; interference: string[] } | null> {
|
|
125
|
+
if (!this.available || !this.substrate) return null;
|
|
126
|
+
return this.substrate.memory.store(id, content);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Recall memories by similarity
|
|
131
|
+
*/
|
|
132
|
+
async recallMemories(query: string, topK = 5): Promise<Array<MemoryEntry & { relevance: number }> | null> {
|
|
133
|
+
if (!this.available || !this.substrate) return null;
|
|
134
|
+
return this.substrate.memory.recall(query, topK);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Consolidate memories (merge similar, forget weak)
|
|
139
|
+
*/
|
|
140
|
+
consolidateMemories(): { merged: number; forgotten: number; remaining: number } | null {
|
|
141
|
+
if (!this.available || !this.substrate) return null;
|
|
142
|
+
return this.substrate.memory.consolidate();
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Register agent for state tracking
|
|
147
|
+
*/
|
|
148
|
+
async registerAgent(id: string, role: string): Promise<AgentState | null> {
|
|
149
|
+
if (!this.available || !this.substrate) return null;
|
|
150
|
+
return this.substrate.states.registerAgent(id, role);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Update agent state based on observation
|
|
155
|
+
*/
|
|
156
|
+
async updateAgentState(agentId: string, observation: string): Promise<{
|
|
157
|
+
newState: AgentState;
|
|
158
|
+
nearestRegion: string;
|
|
159
|
+
regionProximity: number;
|
|
160
|
+
} | null> {
|
|
161
|
+
if (!this.available || !this.substrate) return null;
|
|
162
|
+
return this.substrate.states.updateState(agentId, observation);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Get agent state
|
|
167
|
+
*/
|
|
168
|
+
getAgentState(agentId: string): AgentState | null {
|
|
169
|
+
if (!this.available || !this.substrate) return null;
|
|
170
|
+
return this.substrate.states.getAgent(agentId);
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* Coordinate swarm for task
|
|
175
|
+
*/
|
|
176
|
+
async coordinateSwarm(task: string): Promise<Array<{
|
|
177
|
+
agentId: string;
|
|
178
|
+
taskAlignment: number;
|
|
179
|
+
bestCollaborator: string | null;
|
|
180
|
+
collaborationScore: number;
|
|
181
|
+
}> | null> {
|
|
182
|
+
if (!this.available || !this.substrate) return null;
|
|
183
|
+
return this.substrate.swarm.coordinate(task);
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Add agent to swarm
|
|
188
|
+
*/
|
|
189
|
+
async addSwarmAgent(id: string, role: string): Promise<AgentState | null> {
|
|
190
|
+
if (!this.available || !this.substrate) return null;
|
|
191
|
+
return this.substrate.swarm.addAgent(id, role);
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Calibrate coherence monitor
|
|
196
|
+
*/
|
|
197
|
+
async calibrateCoherence(goodOutputs: string[]): Promise<{ calibrated: boolean; sampleCount: number } | null> {
|
|
198
|
+
if (!this.available || !this.substrate) return null;
|
|
199
|
+
return this.substrate.coherence.calibrate(goodOutputs);
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
/**
|
|
203
|
+
* Check output coherence
|
|
204
|
+
*/
|
|
205
|
+
async checkCoherence(output: string): Promise<CoherenceResult | null> {
|
|
206
|
+
if (!this.available || !this.substrate) return null;
|
|
207
|
+
return this.substrate.coherence.check(output);
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
/**
|
|
211
|
+
* Process input through full neural substrate
|
|
212
|
+
*/
|
|
213
|
+
async process(input: string, context?: {
|
|
214
|
+
agentId?: string;
|
|
215
|
+
memoryId?: string;
|
|
216
|
+
checkCoherence?: boolean;
|
|
217
|
+
}): Promise<{
|
|
218
|
+
drift: DriftResult;
|
|
219
|
+
state?: { nearestRegion: string; regionProximity: number };
|
|
220
|
+
coherence?: CoherenceResult;
|
|
221
|
+
stored?: boolean;
|
|
222
|
+
} | null> {
|
|
223
|
+
if (!this.available || !this.substrate) return null;
|
|
224
|
+
return this.substrate.process(input, context);
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
/**
|
|
228
|
+
* Get substrate health
|
|
229
|
+
*/
|
|
230
|
+
health(): SubstrateHealth | null {
|
|
231
|
+
if (!this.available || !this.substrate) return null;
|
|
232
|
+
return this.substrate.health();
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Full consolidation pass
|
|
237
|
+
*/
|
|
238
|
+
consolidate(): { memory: { merged: number; forgotten: number; remaining: number } } | null {
|
|
239
|
+
if (!this.available || !this.substrate) return null;
|
|
240
|
+
return this.substrate.consolidate();
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
/**
|
|
245
|
+
* Create neural embedding service
|
|
246
|
+
*/
|
|
247
|
+
export function createNeuralService(config: NeuralSubstrateConfig = {}): NeuralEmbeddingService {
|
|
248
|
+
return new NeuralEmbeddingService(config);
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Check if neural features are available
|
|
253
|
+
*/
|
|
254
|
+
export async function isNeuralAvailable(): Promise<boolean> {
|
|
255
|
+
try {
|
|
256
|
+
await import('@sparkleideas/agentic-flow/embeddings');
|
|
257
|
+
return true;
|
|
258
|
+
} catch {
|
|
259
|
+
return false;
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
/**
|
|
264
|
+
* List available ONNX embedding models
|
|
265
|
+
*/
|
|
266
|
+
export async function listEmbeddingModels(): Promise<Array<{
|
|
267
|
+
id: string;
|
|
268
|
+
dimension: number;
|
|
269
|
+
size: string;
|
|
270
|
+
quantized: boolean;
|
|
271
|
+
downloaded: boolean;
|
|
272
|
+
}>> {
|
|
273
|
+
try {
|
|
274
|
+
const { listAvailableModels } = await import('@sparkleideas/agentic-flow/embeddings');
|
|
275
|
+
return listAvailableModels();
|
|
276
|
+
} catch {
|
|
277
|
+
// Return default models if @sparkleideas/agentic-flow not available
|
|
278
|
+
return [
|
|
279
|
+
{ id: 'all-MiniLM-L6-v2', dimension: 384, size: '23MB', quantized: false, downloaded: false },
|
|
280
|
+
{ id: 'all-mpnet-base-v2', dimension: 768, size: '110MB', quantized: false, downloaded: false },
|
|
281
|
+
];
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
/**
|
|
286
|
+
* Download embedding model
|
|
287
|
+
*/
|
|
288
|
+
export async function downloadEmbeddingModel(
|
|
289
|
+
modelId: string,
|
|
290
|
+
targetDir?: string,
|
|
291
|
+
onProgress?: (progress: { percent: number; bytesDownloaded: number; totalBytes: number }) => void
|
|
292
|
+
): Promise<string> {
|
|
293
|
+
const { downloadModel } = await import('@sparkleideas/agentic-flow/embeddings');
|
|
294
|
+
return downloadModel(modelId, targetDir ?? '.models', onProgress);
|
|
295
|
+
}
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedding Normalization Utilities
|
|
3
|
+
*
|
|
4
|
+
* Features:
|
|
5
|
+
* - L2 (Euclidean) normalization
|
|
6
|
+
* - L1 (Manhattan) normalization
|
|
7
|
+
* - Min-max normalization
|
|
8
|
+
* - Z-score standardization
|
|
9
|
+
* - Batch normalization
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Normalization type
|
|
14
|
+
*/
|
|
15
|
+
export type NormalizationType = 'l2' | 'l1' | 'minmax' | 'zscore' | 'none';
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Normalization options
|
|
19
|
+
*/
|
|
20
|
+
export interface NormalizationOptions {
|
|
21
|
+
/** Normalization type (default: 'l2') */
|
|
22
|
+
type?: NormalizationType;
|
|
23
|
+
/** Epsilon for numerical stability (default: 1e-12) */
|
|
24
|
+
epsilon?: number;
|
|
25
|
+
/** In-place modification (default: false) */
|
|
26
|
+
inPlace?: boolean;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* L2 (Euclidean) normalize embedding to unit length
|
|
31
|
+
* Most common for cosine similarity
|
|
32
|
+
*
|
|
33
|
+
* @param embedding - Input embedding vector
|
|
34
|
+
* @param epsilon - Small value to prevent division by zero
|
|
35
|
+
* @returns Normalized embedding with ||v|| = 1
|
|
36
|
+
*/
|
|
37
|
+
export function l2Normalize(
|
|
38
|
+
embedding: Float32Array | number[],
|
|
39
|
+
epsilon = 1e-12
|
|
40
|
+
): Float32Array {
|
|
41
|
+
const result = embedding instanceof Float32Array
|
|
42
|
+
? new Float32Array(embedding.length)
|
|
43
|
+
: new Float32Array(embedding.length);
|
|
44
|
+
|
|
45
|
+
// Calculate L2 norm (Euclidean length)
|
|
46
|
+
let sumSquares = 0;
|
|
47
|
+
for (let i = 0; i < embedding.length; i++) {
|
|
48
|
+
sumSquares += embedding[i] * embedding[i];
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const norm = Math.sqrt(sumSquares);
|
|
52
|
+
const scale = norm > epsilon ? 1 / norm : 0;
|
|
53
|
+
|
|
54
|
+
// Normalize
|
|
55
|
+
for (let i = 0; i < embedding.length; i++) {
|
|
56
|
+
result[i] = embedding[i] * scale;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
return result;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* L2 normalize embedding in-place (modifies original array)
|
|
64
|
+
*/
|
|
65
|
+
export function l2NormalizeInPlace(
|
|
66
|
+
embedding: Float32Array,
|
|
67
|
+
epsilon = 1e-12
|
|
68
|
+
): Float32Array {
|
|
69
|
+
let sumSquares = 0;
|
|
70
|
+
for (let i = 0; i < embedding.length; i++) {
|
|
71
|
+
sumSquares += embedding[i] * embedding[i];
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const norm = Math.sqrt(sumSquares);
|
|
75
|
+
const scale = norm > epsilon ? 1 / norm : 0;
|
|
76
|
+
|
|
77
|
+
for (let i = 0; i < embedding.length; i++) {
|
|
78
|
+
embedding[i] *= scale;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
return embedding;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* L1 (Manhattan) normalize embedding
|
|
86
|
+
* Sum of absolute values = 1
|
|
87
|
+
*/
|
|
88
|
+
export function l1Normalize(
|
|
89
|
+
embedding: Float32Array | number[],
|
|
90
|
+
epsilon = 1e-12
|
|
91
|
+
): Float32Array {
|
|
92
|
+
const result = new Float32Array(embedding.length);
|
|
93
|
+
|
|
94
|
+
// Calculate L1 norm (sum of absolute values)
|
|
95
|
+
let sumAbs = 0;
|
|
96
|
+
for (let i = 0; i < embedding.length; i++) {
|
|
97
|
+
sumAbs += Math.abs(embedding[i]);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
const scale = sumAbs > epsilon ? 1 / sumAbs : 0;
|
|
101
|
+
|
|
102
|
+
for (let i = 0; i < embedding.length; i++) {
|
|
103
|
+
result[i] = embedding[i] * scale;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
return result;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Min-max normalize embedding to [0, 1] range
|
|
111
|
+
*/
|
|
112
|
+
export function minMaxNormalize(
|
|
113
|
+
embedding: Float32Array | number[],
|
|
114
|
+
epsilon = 1e-12
|
|
115
|
+
): Float32Array {
|
|
116
|
+
const result = new Float32Array(embedding.length);
|
|
117
|
+
|
|
118
|
+
// Find min and max
|
|
119
|
+
let min = Infinity;
|
|
120
|
+
let max = -Infinity;
|
|
121
|
+
|
|
122
|
+
for (let i = 0; i < embedding.length; i++) {
|
|
123
|
+
if (embedding[i] < min) min = embedding[i];
|
|
124
|
+
if (embedding[i] > max) max = embedding[i];
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
const range = max - min;
|
|
128
|
+
const scale = range > epsilon ? 1 / range : 0;
|
|
129
|
+
|
|
130
|
+
for (let i = 0; i < embedding.length; i++) {
|
|
131
|
+
result[i] = (embedding[i] - min) * scale;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
return result;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Z-score standardize embedding (mean=0, std=1)
|
|
139
|
+
*/
|
|
140
|
+
export function zScoreNormalize(
|
|
141
|
+
embedding: Float32Array | number[],
|
|
142
|
+
epsilon = 1e-12
|
|
143
|
+
): Float32Array {
|
|
144
|
+
const result = new Float32Array(embedding.length);
|
|
145
|
+
const n = embedding.length;
|
|
146
|
+
|
|
147
|
+
// Calculate mean
|
|
148
|
+
let sum = 0;
|
|
149
|
+
for (let i = 0; i < n; i++) {
|
|
150
|
+
sum += embedding[i];
|
|
151
|
+
}
|
|
152
|
+
const mean = sum / n;
|
|
153
|
+
|
|
154
|
+
// Calculate standard deviation
|
|
155
|
+
let sumSquaredDiff = 0;
|
|
156
|
+
for (let i = 0; i < n; i++) {
|
|
157
|
+
const diff = embedding[i] - mean;
|
|
158
|
+
sumSquaredDiff += diff * diff;
|
|
159
|
+
}
|
|
160
|
+
const std = Math.sqrt(sumSquaredDiff / n);
|
|
161
|
+
const scale = std > epsilon ? 1 / std : 0;
|
|
162
|
+
|
|
163
|
+
// Standardize
|
|
164
|
+
for (let i = 0; i < n; i++) {
|
|
165
|
+
result[i] = (embedding[i] - mean) * scale;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
return result;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Normalize embedding using specified method
|
|
173
|
+
*/
|
|
174
|
+
export function normalize(
|
|
175
|
+
embedding: Float32Array | number[],
|
|
176
|
+
options: NormalizationOptions = {}
|
|
177
|
+
): Float32Array {
|
|
178
|
+
const { type = 'l2', epsilon = 1e-12, inPlace = false } = options;
|
|
179
|
+
|
|
180
|
+
if (type === 'none') {
|
|
181
|
+
return embedding instanceof Float32Array
|
|
182
|
+
? embedding
|
|
183
|
+
: new Float32Array(embedding);
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
if (inPlace && embedding instanceof Float32Array && type === 'l2') {
|
|
187
|
+
return l2NormalizeInPlace(embedding, epsilon);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
switch (type) {
|
|
191
|
+
case 'l2':
|
|
192
|
+
return l2Normalize(embedding, epsilon);
|
|
193
|
+
case 'l1':
|
|
194
|
+
return l1Normalize(embedding, epsilon);
|
|
195
|
+
case 'minmax':
|
|
196
|
+
return minMaxNormalize(embedding, epsilon);
|
|
197
|
+
case 'zscore':
|
|
198
|
+
return zScoreNormalize(embedding, epsilon);
|
|
199
|
+
default:
|
|
200
|
+
return l2Normalize(embedding, epsilon);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
/**
|
|
205
|
+
* Batch normalize multiple embeddings
|
|
206
|
+
*/
|
|
207
|
+
export function normalizeBatch(
|
|
208
|
+
embeddings: Array<Float32Array | number[]>,
|
|
209
|
+
options: NormalizationOptions = {}
|
|
210
|
+
): Float32Array[] {
|
|
211
|
+
return embeddings.map(emb => normalize(emb, options));
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
/**
|
|
215
|
+
* Calculate L2 norm of embedding
|
|
216
|
+
*/
|
|
217
|
+
export function l2Norm(embedding: Float32Array | number[]): number {
|
|
218
|
+
let sumSquares = 0;
|
|
219
|
+
for (let i = 0; i < embedding.length; i++) {
|
|
220
|
+
sumSquares += embedding[i] * embedding[i];
|
|
221
|
+
}
|
|
222
|
+
return Math.sqrt(sumSquares);
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* Check if embedding is already normalized (L2 norm ≈ 1)
|
|
227
|
+
*/
|
|
228
|
+
export function isNormalized(
|
|
229
|
+
embedding: Float32Array | number[],
|
|
230
|
+
tolerance = 1e-6
|
|
231
|
+
): boolean {
|
|
232
|
+
const norm = l2Norm(embedding);
|
|
233
|
+
return Math.abs(norm - 1) < tolerance;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
/**
|
|
237
|
+
* Center embeddings by subtracting mean across batch
|
|
238
|
+
* Useful for improving similarity metrics
|
|
239
|
+
*/
|
|
240
|
+
export function centerEmbeddings(
|
|
241
|
+
embeddings: Array<Float32Array | number[]>
|
|
242
|
+
): Float32Array[] {
|
|
243
|
+
if (embeddings.length === 0) return [];
|
|
244
|
+
|
|
245
|
+
const dim = embeddings[0].length;
|
|
246
|
+
const n = embeddings.length;
|
|
247
|
+
|
|
248
|
+
// Calculate mean for each dimension
|
|
249
|
+
const mean = new Float32Array(dim);
|
|
250
|
+
for (const emb of embeddings) {
|
|
251
|
+
for (let i = 0; i < dim; i++) {
|
|
252
|
+
mean[i] += emb[i];
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
for (let i = 0; i < dim; i++) {
|
|
256
|
+
mean[i] /= n;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
// Subtract mean from each embedding
|
|
260
|
+
return embeddings.map(emb => {
|
|
261
|
+
const centered = new Float32Array(dim);
|
|
262
|
+
for (let i = 0; i < dim; i++) {
|
|
263
|
+
centered[i] = emb[i] - mean[i];
|
|
264
|
+
}
|
|
265
|
+
return centered;
|
|
266
|
+
});
|
|
267
|
+
}
|