@sparkleideas/embeddings 3.0.0-alpha.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,458 @@
1
+ /**
2
+ * Hyperbolic Embedding Utilities
3
+ *
4
+ * Convert Euclidean embeddings to hyperbolic (Poincaré ball) space
5
+ * for better representation of hierarchical relationships.
6
+ *
7
+ * Features:
8
+ * - Euclidean to Poincaré ball conversion
9
+ * - Hyperbolic distance metrics
10
+ * - Mobius operations (addition, scalar multiplication)
11
+ * - Exponential and logarithmic maps
12
+ *
13
+ * References:
14
+ * - Nickel & Kiela (2017): "Poincaré Embeddings for Learning Hierarchical Representations"
15
+ * - Ganea et al. (2018): "Hyperbolic Neural Networks"
16
+ */
17
+
18
+ /**
19
+ * Hyperbolic geometry configuration
20
+ */
21
+ export interface HyperbolicConfig {
22
+ /** Curvature of hyperbolic space (default: -1) */
23
+ curvature?: number;
24
+ /** Epsilon for numerical stability (default: 1e-15) */
25
+ epsilon?: number;
26
+ /** Maximum norm to prevent numerical issues (default: 1 - 1e-5) */
27
+ maxNorm?: number;
28
+ }
29
+
30
+ const DEFAULT_CONFIG: Required<HyperbolicConfig> = {
31
+ curvature: -1,
32
+ epsilon: 1e-15,
33
+ maxNorm: 1 - 1e-5,
34
+ };
35
+
36
+ /**
37
+ * Compute L2 norm of vector
38
+ */
39
+ function l2Norm(v: Float32Array | number[]): number {
40
+ let sum = 0;
41
+ for (let i = 0; i < v.length; i++) {
42
+ sum += v[i] * v[i];
43
+ }
44
+ return Math.sqrt(sum);
45
+ }
46
+
47
+ /**
48
+ * Clamp vector norm to stay within Poincaré ball
49
+ */
50
+ function clampNorm(
51
+ v: Float32Array,
52
+ maxNorm: number,
53
+ epsilon: number
54
+ ): Float32Array {
55
+ const norm = l2Norm(v);
56
+ if (norm > maxNorm) {
57
+ const scale = (maxNorm - epsilon) / norm;
58
+ for (let i = 0; i < v.length; i++) {
59
+ v[i] *= scale;
60
+ }
61
+ }
62
+ return v;
63
+ }
64
+
65
+ /**
66
+ * Convert Euclidean embedding to Poincaré ball
67
+ *
68
+ * Uses exponential map at origin to project Euclidean vectors
69
+ * into the Poincaré ball model of hyperbolic space.
70
+ *
71
+ * @param euclidean - Euclidean embedding vector
72
+ * @param config - Hyperbolic geometry configuration
73
+ * @returns Poincaré ball embedding
74
+ */
75
+ export function euclideanToPoincare(
76
+ euclidean: Float32Array | number[],
77
+ config: HyperbolicConfig = {}
78
+ ): Float32Array {
79
+ const { curvature, epsilon, maxNorm } = { ...DEFAULT_CONFIG, ...config };
80
+ const c = Math.abs(curvature);
81
+ const sqrtC = Math.sqrt(c);
82
+
83
+ const result = new Float32Array(euclidean.length);
84
+ const norm = l2Norm(euclidean);
85
+
86
+ if (norm < epsilon) {
87
+ // Near origin, return as-is (origin maps to origin)
88
+ for (let i = 0; i < euclidean.length; i++) {
89
+ result[i] = euclidean[i];
90
+ }
91
+ return result;
92
+ }
93
+
94
+ // Exponential map at origin: exp_0(v) = tanh(sqrt(c) * ||v|| / 2) * v / (sqrt(c) * ||v||)
95
+ const factor = Math.tanh(sqrtC * norm / 2) / (sqrtC * norm);
96
+
97
+ for (let i = 0; i < euclidean.length; i++) {
98
+ result[i] = euclidean[i] * factor;
99
+ }
100
+
101
+ return clampNorm(result, maxNorm, epsilon);
102
+ }
103
+
104
+ /**
105
+ * Convert Poincaré ball embedding back to Euclidean
106
+ *
107
+ * Uses logarithmic map at origin to project back to Euclidean space.
108
+ *
109
+ * @param poincare - Poincaré ball embedding
110
+ * @param config - Hyperbolic geometry configuration
111
+ * @returns Euclidean embedding vector
112
+ */
113
+ export function poincareToEuclidean(
114
+ poincare: Float32Array | number[],
115
+ config: HyperbolicConfig = {}
116
+ ): Float32Array {
117
+ const { curvature, epsilon } = { ...DEFAULT_CONFIG, ...config };
118
+ const c = Math.abs(curvature);
119
+ const sqrtC = Math.sqrt(c);
120
+
121
+ const result = new Float32Array(poincare.length);
122
+ const norm = l2Norm(poincare);
123
+
124
+ if (norm < epsilon) {
125
+ for (let i = 0; i < poincare.length; i++) {
126
+ result[i] = poincare[i];
127
+ }
128
+ return result;
129
+ }
130
+
131
+ // Logarithmic map at origin: log_0(y) = 2 * arctanh(sqrt(c) * ||y||) * y / (sqrt(c) * ||y||)
132
+ const factor = 2 * Math.atanh(sqrtC * norm) / (sqrtC * norm);
133
+
134
+ for (let i = 0; i < poincare.length; i++) {
135
+ result[i] = poincare[i] * factor;
136
+ }
137
+
138
+ return result;
139
+ }
140
+
141
+ /**
142
+ * Compute hyperbolic distance in Poincaré ball
143
+ *
144
+ * The geodesic distance between two points in the Poincaré ball.
145
+ *
146
+ * @param a - First Poincaré embedding
147
+ * @param b - Second Poincaré embedding
148
+ * @param config - Hyperbolic geometry configuration
149
+ * @returns Hyperbolic distance
150
+ */
151
+ export function hyperbolicDistance(
152
+ a: Float32Array | number[],
153
+ b: Float32Array | number[],
154
+ config: HyperbolicConfig = {}
155
+ ): number {
156
+ const { curvature, epsilon } = { ...DEFAULT_CONFIG, ...config };
157
+ const c = Math.abs(curvature);
158
+ const sqrtC = Math.sqrt(c);
159
+
160
+ if (a.length !== b.length) {
161
+ throw new Error('Embeddings must have same dimension');
162
+ }
163
+
164
+ // ||a - b||^2
165
+ let diffNormSq = 0;
166
+ for (let i = 0; i < a.length; i++) {
167
+ const d = a[i] - b[i];
168
+ diffNormSq += d * d;
169
+ }
170
+
171
+ // ||a||^2 and ||b||^2
172
+ let normASq = 0;
173
+ let normBSq = 0;
174
+ for (let i = 0; i < a.length; i++) {
175
+ normASq += a[i] * a[i];
176
+ normBSq += b[i] * b[i];
177
+ }
178
+
179
+ // Poincaré distance formula:
180
+ // d(a, b) = (1/sqrt(c)) * arcosh(1 + 2c * ||a-b||^2 / ((1 - c*||a||^2)(1 - c*||b||^2)))
181
+ const numerator = 2 * c * diffNormSq;
182
+ const denominator = (1 - c * normASq) * (1 - c * normBSq);
183
+
184
+ // Clamp to prevent numerical issues
185
+ const arg = Math.max(1, 1 + numerator / Math.max(denominator, epsilon));
186
+
187
+ return Math.acosh(arg) / sqrtC;
188
+ }
189
+
190
+ /**
191
+ * Möbius addition in Poincaré ball
192
+ *
193
+ * Hyperbolic "addition" operation that respects the ball geometry.
194
+ *
195
+ * @param a - First vector
196
+ * @param b - Second vector
197
+ * @param config - Configuration
198
+ * @returns a ⊕ b in hyperbolic space
199
+ */
200
+ export function mobiusAdd(
201
+ a: Float32Array | number[],
202
+ b: Float32Array | number[],
203
+ config: HyperbolicConfig = {}
204
+ ): Float32Array {
205
+ const { curvature, epsilon, maxNorm } = { ...DEFAULT_CONFIG, ...config };
206
+ const c = Math.abs(curvature);
207
+
208
+ if (a.length !== b.length) {
209
+ throw new Error('Vectors must have same dimension');
210
+ }
211
+
212
+ let normASq = 0;
213
+ let normBSq = 0;
214
+ for (let i = 0; i < a.length; i++) {
215
+ normASq += a[i] * a[i];
216
+ normBSq += b[i] * b[i];
217
+ }
218
+
219
+ // <a, b>
220
+ let dotAB = 0;
221
+ for (let i = 0; i < a.length; i++) {
222
+ dotAB += a[i] * b[i];
223
+ }
224
+
225
+ // Möbius addition formula
226
+ const numeratorCoeffA = 1 + 2 * c * dotAB + c * normBSq;
227
+ const numeratorCoeffB = 1 - c * normASq;
228
+ const denominator = 1 + 2 * c * dotAB + c * c * normASq * normBSq;
229
+
230
+ const result = new Float32Array(a.length);
231
+ for (let i = 0; i < a.length; i++) {
232
+ result[i] = (numeratorCoeffA * a[i] + numeratorCoeffB * b[i]) / Math.max(denominator, epsilon);
233
+ }
234
+
235
+ return clampNorm(result, maxNorm, epsilon);
236
+ }
237
+
238
+ /**
239
+ * Möbius scalar multiplication in Poincaré ball
240
+ *
241
+ * @param r - Scalar
242
+ * @param v - Vector in Poincaré ball
243
+ * @param config - Configuration
244
+ * @returns r ⊗ v in hyperbolic space
245
+ */
246
+ export function mobiusScalarMul(
247
+ r: number,
248
+ v: Float32Array | number[],
249
+ config: HyperbolicConfig = {}
250
+ ): Float32Array {
251
+ const { curvature, epsilon, maxNorm } = { ...DEFAULT_CONFIG, ...config };
252
+ const c = Math.abs(curvature);
253
+ const sqrtC = Math.sqrt(c);
254
+
255
+ const norm = l2Norm(v);
256
+
257
+ if (norm < epsilon) {
258
+ return new Float32Array(v.length);
259
+ }
260
+
261
+ // r ⊗ v = tanh(r * arctanh(sqrt(c) * ||v||)) * v / (sqrt(c) * ||v||)
262
+ const factor = Math.tanh(r * Math.atanh(sqrtC * norm)) / (sqrtC * norm);
263
+
264
+ const result = new Float32Array(v.length);
265
+ for (let i = 0; i < v.length; i++) {
266
+ result[i] = v[i] * factor;
267
+ }
268
+
269
+ return clampNorm(result, maxNorm, epsilon);
270
+ }
271
+
272
+ /**
273
+ * Compute hyperbolic centroid (Fréchet mean) of multiple points
274
+ *
275
+ * Uses iterative optimization to find the centroid in Poincaré ball.
276
+ *
277
+ * @param points - Array of Poincaré embeddings
278
+ * @param config - Configuration
279
+ * @param maxIter - Maximum iterations (default: 100)
280
+ * @returns Hyperbolic centroid
281
+ */
282
+ export function hyperbolicCentroid(
283
+ points: Array<Float32Array | number[]>,
284
+ config: HyperbolicConfig = {},
285
+ maxIter = 100
286
+ ): Float32Array {
287
+ if (points.length === 0) {
288
+ throw new Error('Need at least one point');
289
+ }
290
+ if (points.length === 1) {
291
+ const arr = new Float32Array(points[0].length);
292
+ for (let i = 0; i < points[0].length; i++) {
293
+ arr[i] = points[0][i];
294
+ }
295
+ return arr;
296
+ }
297
+
298
+ const { epsilon } = { ...DEFAULT_CONFIG, ...config };
299
+ const dim = points[0].length;
300
+
301
+ // Initialize centroid at Euclidean mean projected to ball
302
+ const centroidInit = new Float32Array(dim);
303
+ for (const p of points) {
304
+ for (let i = 0; i < dim; i++) {
305
+ centroidInit[i] += p[i];
306
+ }
307
+ }
308
+ for (let i = 0; i < dim; i++) {
309
+ centroidInit[i] /= points.length;
310
+ }
311
+
312
+ // Project to Poincaré ball
313
+ const projectedInit = euclideanToPoincare(centroidInit, config);
314
+ let centroid = new Float32Array(dim);
315
+ for (let i = 0; i < dim; i++) {
316
+ centroid[i] = projectedInit[i];
317
+ }
318
+
319
+ // Iterative refinement using Karcher mean algorithm
320
+ for (let iter = 0; iter < maxIter; iter++) {
321
+ const gradient = new Float32Array(dim);
322
+
323
+ for (const p of points) {
324
+ // Log map from centroid to point
325
+ const pArr = p instanceof Float32Array ? p : new Float32Array(p);
326
+ const logMap = logMapAt(centroid, pArr, config);
327
+ for (let i = 0; i < dim; i++) {
328
+ gradient[i] += logMap[i];
329
+ }
330
+ }
331
+
332
+ // Check convergence
333
+ const gradNorm = l2Norm(gradient);
334
+ if (gradNorm < epsilon) break;
335
+
336
+ // Update centroid using exponential map
337
+ for (let i = 0; i < dim; i++) {
338
+ gradient[i] /= points.length;
339
+ }
340
+ const updated = expMapAt(centroid, gradient, config);
341
+ for (let i = 0; i < dim; i++) {
342
+ centroid[i] = updated[i];
343
+ }
344
+ }
345
+
346
+ return centroid;
347
+ }
348
+
349
+ /**
350
+ * Exponential map at point p
351
+ */
352
+ function expMapAt(
353
+ p: Float32Array,
354
+ v: Float32Array,
355
+ config: HyperbolicConfig = {}
356
+ ): Float32Array {
357
+ const { curvature, epsilon, maxNorm } = { ...DEFAULT_CONFIG, ...config };
358
+ const c = Math.abs(curvature);
359
+
360
+ const normP = l2Norm(p);
361
+ const lambdaP = 2 / (1 - c * normP * normP);
362
+ const normV = l2Norm(v);
363
+
364
+ if (normV < epsilon) {
365
+ return new Float32Array(p);
366
+ }
367
+
368
+ const sqrtC = Math.sqrt(c);
369
+ const tanhArg = sqrtC * lambdaP * normV / 2;
370
+ const coeff = Math.tanh(tanhArg) / (sqrtC * normV);
371
+
372
+ const scaledV = new Float32Array(v.length);
373
+ for (let i = 0; i < v.length; i++) {
374
+ scaledV[i] = v[i] * coeff;
375
+ }
376
+
377
+ return clampNorm(mobiusAdd(p, scaledV, config), maxNorm, epsilon);
378
+ }
379
+
380
+ /**
381
+ * Logarithmic map at point p
382
+ */
383
+ function logMapAt(
384
+ p: Float32Array,
385
+ q: Float32Array,
386
+ config: HyperbolicConfig = {}
387
+ ): Float32Array {
388
+ const { curvature, epsilon } = { ...DEFAULT_CONFIG, ...config };
389
+ const c = Math.abs(curvature);
390
+ const sqrtC = Math.sqrt(c);
391
+
392
+ // -p ⊕ q
393
+ const negP = new Float32Array(p.length);
394
+ for (let i = 0; i < p.length; i++) {
395
+ negP[i] = -p[i];
396
+ }
397
+ const diff = mobiusAdd(negP, q, config);
398
+
399
+ const normP = l2Norm(p);
400
+ const normDiff = l2Norm(diff);
401
+ const lambdaP = 2 / (1 - c * normP * normP);
402
+
403
+ if (normDiff < epsilon) {
404
+ return new Float32Array(p.length);
405
+ }
406
+
407
+ const coeff = (2 / (sqrtC * lambdaP)) * Math.atanh(sqrtC * normDiff) / normDiff;
408
+
409
+ const result = new Float32Array(diff.length);
410
+ for (let i = 0; i < diff.length; i++) {
411
+ result[i] = diff[i] * coeff;
412
+ }
413
+
414
+ return result;
415
+ }
416
+
417
+ /**
418
+ * Batch convert Euclidean embeddings to Poincaré ball
419
+ */
420
+ export function batchEuclideanToPoincare(
421
+ embeddings: Array<Float32Array | number[]>,
422
+ config: HyperbolicConfig = {}
423
+ ): Float32Array[] {
424
+ return embeddings.map(e => euclideanToPoincare(e, config));
425
+ }
426
+
427
+ /**
428
+ * Compute pairwise hyperbolic distances
429
+ */
430
+ export function pairwiseHyperbolicDistances(
431
+ embeddings: Float32Array[],
432
+ config: HyperbolicConfig = {}
433
+ ): Float32Array {
434
+ const n = embeddings.length;
435
+ const distances = new Float32Array((n * (n - 1)) / 2);
436
+
437
+ let idx = 0;
438
+ for (let i = 0; i < n; i++) {
439
+ for (let j = i + 1; j < n; j++) {
440
+ distances[idx++] = hyperbolicDistance(embeddings[i], embeddings[j], config);
441
+ }
442
+ }
443
+
444
+ return distances;
445
+ }
446
+
447
+ /**
448
+ * Check if point is inside Poincaré ball
449
+ */
450
+ export function isInPoincareBall(
451
+ v: Float32Array | number[],
452
+ config: HyperbolicConfig = {}
453
+ ): boolean {
454
+ const { curvature } = { ...DEFAULT_CONFIG, ...config };
455
+ const c = Math.abs(curvature);
456
+ const norm = l2Norm(v);
457
+ return norm < 1 / Math.sqrt(c);
458
+ }
package/src/index.ts ADDED
@@ -0,0 +1,116 @@
1
+ /**
2
+ * V3 Embedding Service Module
3
+ *
4
+ * Production embedding service aligned with @sparkleideas/agentic-flow@alpha:
5
+ * - OpenAI provider (text-embedding-3-small/large)
6
+ * - Transformers.js provider (local ONNX models)
7
+ * - Agentic-flow provider (optimized ONNX with SIMD)
8
+ * - Mock provider (development/testing)
9
+ *
10
+ * Additional features:
11
+ * - Persistent SQLite cache
12
+ * - Document chunking with overlap
13
+ * - L2/L1/minmax/zscore normalization
14
+ * - Hyperbolic embeddings (Poincaré ball)
15
+ * - Neural substrate integration (drift, memory, swarm)
16
+ *
17
+ * @module @sparkleideas/embeddings
18
+ */
19
+
20
+ export * from './types.js';
21
+ export * from './embedding-service.js';
22
+
23
+ // Re-export commonly used items at top level
24
+ export {
25
+ createEmbeddingService,
26
+ createEmbeddingServiceAsync,
27
+ getEmbedding,
28
+ cosineSimilarity,
29
+ euclideanDistance,
30
+ dotProduct,
31
+ computeSimilarity,
32
+ OpenAIEmbeddingService,
33
+ TransformersEmbeddingService,
34
+ MockEmbeddingService,
35
+ AgenticFlowEmbeddingService,
36
+ } from './embedding-service.js';
37
+
38
+ export type { AutoEmbeddingConfig } from './embedding-service.js';
39
+
40
+ // Chunking utilities
41
+ export {
42
+ chunkText,
43
+ estimateTokens,
44
+ reconstructFromChunks,
45
+ type ChunkingConfig,
46
+ type Chunk,
47
+ type ChunkedDocument,
48
+ } from './chunking.js';
49
+
50
+ // Normalization utilities
51
+ export {
52
+ l2Normalize,
53
+ l2NormalizeInPlace,
54
+ l1Normalize,
55
+ minMaxNormalize,
56
+ zScoreNormalize,
57
+ normalize,
58
+ normalizeBatch,
59
+ l2Norm,
60
+ isNormalized,
61
+ centerEmbeddings,
62
+ type NormalizationOptions,
63
+ } from './normalization.js';
64
+
65
+ // Hyperbolic embeddings (Poincaré ball)
66
+ export {
67
+ euclideanToPoincare,
68
+ poincareToEuclidean,
69
+ hyperbolicDistance,
70
+ mobiusAdd,
71
+ mobiusScalarMul,
72
+ hyperbolicCentroid,
73
+ batchEuclideanToPoincare,
74
+ pairwiseHyperbolicDistances,
75
+ isInPoincareBall,
76
+ type HyperbolicConfig,
77
+ } from './hyperbolic.js';
78
+
79
+ // Persistent cache
80
+ export {
81
+ PersistentEmbeddingCache,
82
+ isPersistentCacheAvailable,
83
+ type PersistentCacheConfig as DiskCacheConfig,
84
+ type PersistentCacheStats,
85
+ } from './persistent-cache.js';
86
+
87
+ // Neural substrate integration
88
+ export {
89
+ NeuralEmbeddingService,
90
+ createNeuralService,
91
+ isNeuralAvailable,
92
+ listEmbeddingModels,
93
+ downloadEmbeddingModel,
94
+ type DriftResult,
95
+ type MemoryEntry,
96
+ type AgentState,
97
+ type CoherenceResult,
98
+ type SubstrateHealth,
99
+ type NeuralSubstrateConfig,
100
+ } from './neural-integration.js';
101
+
102
+ export type {
103
+ EmbeddingProvider,
104
+ EmbeddingConfig,
105
+ OpenAIEmbeddingConfig,
106
+ TransformersEmbeddingConfig,
107
+ MockEmbeddingConfig,
108
+ AgenticFlowEmbeddingConfig,
109
+ EmbeddingResult,
110
+ BatchEmbeddingResult,
111
+ IEmbeddingService,
112
+ SimilarityMetric,
113
+ SimilarityResult,
114
+ NormalizationType,
115
+ PersistentCacheConfig,
116
+ } from './types.js';