@sparkleideas/embeddings 3.0.0-alpha.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +651 -0
- package/package.json +66 -0
- package/src/__tests__/embedding-service.test.ts +126 -0
- package/src/chunking.ts +351 -0
- package/src/embedding-service.ts +1136 -0
- package/src/hyperbolic.ts +458 -0
- package/src/index.ts +116 -0
- package/src/neural-integration.ts +295 -0
- package/src/normalization.ts +267 -0
- package/src/persistent-cache.ts +410 -0
- package/src/types.ts +282 -0
|
@@ -0,0 +1,458 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hyperbolic Embedding Utilities
|
|
3
|
+
*
|
|
4
|
+
* Convert Euclidean embeddings to hyperbolic (Poincaré ball) space
|
|
5
|
+
* for better representation of hierarchical relationships.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - Euclidean to Poincaré ball conversion
|
|
9
|
+
* - Hyperbolic distance metrics
|
|
10
|
+
* - Mobius operations (addition, scalar multiplication)
|
|
11
|
+
* - Exponential and logarithmic maps
|
|
12
|
+
*
|
|
13
|
+
* References:
|
|
14
|
+
* - Nickel & Kiela (2017): "Poincaré Embeddings for Learning Hierarchical Representations"
|
|
15
|
+
* - Ganea et al. (2018): "Hyperbolic Neural Networks"
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Hyperbolic geometry configuration
|
|
20
|
+
*/
|
|
21
|
+
export interface HyperbolicConfig {
|
|
22
|
+
/** Curvature of hyperbolic space (default: -1) */
|
|
23
|
+
curvature?: number;
|
|
24
|
+
/** Epsilon for numerical stability (default: 1e-15) */
|
|
25
|
+
epsilon?: number;
|
|
26
|
+
/** Maximum norm to prevent numerical issues (default: 1 - 1e-5) */
|
|
27
|
+
maxNorm?: number;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const DEFAULT_CONFIG: Required<HyperbolicConfig> = {
|
|
31
|
+
curvature: -1,
|
|
32
|
+
epsilon: 1e-15,
|
|
33
|
+
maxNorm: 1 - 1e-5,
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Compute L2 norm of vector
|
|
38
|
+
*/
|
|
39
|
+
function l2Norm(v: Float32Array | number[]): number {
|
|
40
|
+
let sum = 0;
|
|
41
|
+
for (let i = 0; i < v.length; i++) {
|
|
42
|
+
sum += v[i] * v[i];
|
|
43
|
+
}
|
|
44
|
+
return Math.sqrt(sum);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Clamp vector norm to stay within Poincaré ball
|
|
49
|
+
*/
|
|
50
|
+
function clampNorm(
|
|
51
|
+
v: Float32Array,
|
|
52
|
+
maxNorm: number,
|
|
53
|
+
epsilon: number
|
|
54
|
+
): Float32Array {
|
|
55
|
+
const norm = l2Norm(v);
|
|
56
|
+
if (norm > maxNorm) {
|
|
57
|
+
const scale = (maxNorm - epsilon) / norm;
|
|
58
|
+
for (let i = 0; i < v.length; i++) {
|
|
59
|
+
v[i] *= scale;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
return v;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Convert Euclidean embedding to Poincaré ball
|
|
67
|
+
*
|
|
68
|
+
* Uses exponential map at origin to project Euclidean vectors
|
|
69
|
+
* into the Poincaré ball model of hyperbolic space.
|
|
70
|
+
*
|
|
71
|
+
* @param euclidean - Euclidean embedding vector
|
|
72
|
+
* @param config - Hyperbolic geometry configuration
|
|
73
|
+
* @returns Poincaré ball embedding
|
|
74
|
+
*/
|
|
75
|
+
export function euclideanToPoincare(
|
|
76
|
+
euclidean: Float32Array | number[],
|
|
77
|
+
config: HyperbolicConfig = {}
|
|
78
|
+
): Float32Array {
|
|
79
|
+
const { curvature, epsilon, maxNorm } = { ...DEFAULT_CONFIG, ...config };
|
|
80
|
+
const c = Math.abs(curvature);
|
|
81
|
+
const sqrtC = Math.sqrt(c);
|
|
82
|
+
|
|
83
|
+
const result = new Float32Array(euclidean.length);
|
|
84
|
+
const norm = l2Norm(euclidean);
|
|
85
|
+
|
|
86
|
+
if (norm < epsilon) {
|
|
87
|
+
// Near origin, return as-is (origin maps to origin)
|
|
88
|
+
for (let i = 0; i < euclidean.length; i++) {
|
|
89
|
+
result[i] = euclidean[i];
|
|
90
|
+
}
|
|
91
|
+
return result;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// Exponential map at origin: exp_0(v) = tanh(sqrt(c) * ||v|| / 2) * v / (sqrt(c) * ||v||)
|
|
95
|
+
const factor = Math.tanh(sqrtC * norm / 2) / (sqrtC * norm);
|
|
96
|
+
|
|
97
|
+
for (let i = 0; i < euclidean.length; i++) {
|
|
98
|
+
result[i] = euclidean[i] * factor;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
return clampNorm(result, maxNorm, epsilon);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Convert Poincaré ball embedding back to Euclidean
|
|
106
|
+
*
|
|
107
|
+
* Uses logarithmic map at origin to project back to Euclidean space.
|
|
108
|
+
*
|
|
109
|
+
* @param poincare - Poincaré ball embedding
|
|
110
|
+
* @param config - Hyperbolic geometry configuration
|
|
111
|
+
* @returns Euclidean embedding vector
|
|
112
|
+
*/
|
|
113
|
+
export function poincareToEuclidean(
|
|
114
|
+
poincare: Float32Array | number[],
|
|
115
|
+
config: HyperbolicConfig = {}
|
|
116
|
+
): Float32Array {
|
|
117
|
+
const { curvature, epsilon } = { ...DEFAULT_CONFIG, ...config };
|
|
118
|
+
const c = Math.abs(curvature);
|
|
119
|
+
const sqrtC = Math.sqrt(c);
|
|
120
|
+
|
|
121
|
+
const result = new Float32Array(poincare.length);
|
|
122
|
+
const norm = l2Norm(poincare);
|
|
123
|
+
|
|
124
|
+
if (norm < epsilon) {
|
|
125
|
+
for (let i = 0; i < poincare.length; i++) {
|
|
126
|
+
result[i] = poincare[i];
|
|
127
|
+
}
|
|
128
|
+
return result;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Logarithmic map at origin: log_0(y) = 2 * arctanh(sqrt(c) * ||y||) * y / (sqrt(c) * ||y||)
|
|
132
|
+
const factor = 2 * Math.atanh(sqrtC * norm) / (sqrtC * norm);
|
|
133
|
+
|
|
134
|
+
for (let i = 0; i < poincare.length; i++) {
|
|
135
|
+
result[i] = poincare[i] * factor;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
return result;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/**
|
|
142
|
+
* Compute hyperbolic distance in Poincaré ball
|
|
143
|
+
*
|
|
144
|
+
* The geodesic distance between two points in the Poincaré ball.
|
|
145
|
+
*
|
|
146
|
+
* @param a - First Poincaré embedding
|
|
147
|
+
* @param b - Second Poincaré embedding
|
|
148
|
+
* @param config - Hyperbolic geometry configuration
|
|
149
|
+
* @returns Hyperbolic distance
|
|
150
|
+
*/
|
|
151
|
+
export function hyperbolicDistance(
|
|
152
|
+
a: Float32Array | number[],
|
|
153
|
+
b: Float32Array | number[],
|
|
154
|
+
config: HyperbolicConfig = {}
|
|
155
|
+
): number {
|
|
156
|
+
const { curvature, epsilon } = { ...DEFAULT_CONFIG, ...config };
|
|
157
|
+
const c = Math.abs(curvature);
|
|
158
|
+
const sqrtC = Math.sqrt(c);
|
|
159
|
+
|
|
160
|
+
if (a.length !== b.length) {
|
|
161
|
+
throw new Error('Embeddings must have same dimension');
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// ||a - b||^2
|
|
165
|
+
let diffNormSq = 0;
|
|
166
|
+
for (let i = 0; i < a.length; i++) {
|
|
167
|
+
const d = a[i] - b[i];
|
|
168
|
+
diffNormSq += d * d;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// ||a||^2 and ||b||^2
|
|
172
|
+
let normASq = 0;
|
|
173
|
+
let normBSq = 0;
|
|
174
|
+
for (let i = 0; i < a.length; i++) {
|
|
175
|
+
normASq += a[i] * a[i];
|
|
176
|
+
normBSq += b[i] * b[i];
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// Poincaré distance formula:
|
|
180
|
+
// d(a, b) = (1/sqrt(c)) * arcosh(1 + 2c * ||a-b||^2 / ((1 - c*||a||^2)(1 - c*||b||^2)))
|
|
181
|
+
const numerator = 2 * c * diffNormSq;
|
|
182
|
+
const denominator = (1 - c * normASq) * (1 - c * normBSq);
|
|
183
|
+
|
|
184
|
+
// Clamp to prevent numerical issues
|
|
185
|
+
const arg = Math.max(1, 1 + numerator / Math.max(denominator, epsilon));
|
|
186
|
+
|
|
187
|
+
return Math.acosh(arg) / sqrtC;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Möbius addition in Poincaré ball
|
|
192
|
+
*
|
|
193
|
+
* Hyperbolic "addition" operation that respects the ball geometry.
|
|
194
|
+
*
|
|
195
|
+
* @param a - First vector
|
|
196
|
+
* @param b - Second vector
|
|
197
|
+
* @param config - Configuration
|
|
198
|
+
* @returns a ⊕ b in hyperbolic space
|
|
199
|
+
*/
|
|
200
|
+
export function mobiusAdd(
|
|
201
|
+
a: Float32Array | number[],
|
|
202
|
+
b: Float32Array | number[],
|
|
203
|
+
config: HyperbolicConfig = {}
|
|
204
|
+
): Float32Array {
|
|
205
|
+
const { curvature, epsilon, maxNorm } = { ...DEFAULT_CONFIG, ...config };
|
|
206
|
+
const c = Math.abs(curvature);
|
|
207
|
+
|
|
208
|
+
if (a.length !== b.length) {
|
|
209
|
+
throw new Error('Vectors must have same dimension');
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
let normASq = 0;
|
|
213
|
+
let normBSq = 0;
|
|
214
|
+
for (let i = 0; i < a.length; i++) {
|
|
215
|
+
normASq += a[i] * a[i];
|
|
216
|
+
normBSq += b[i] * b[i];
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// <a, b>
|
|
220
|
+
let dotAB = 0;
|
|
221
|
+
for (let i = 0; i < a.length; i++) {
|
|
222
|
+
dotAB += a[i] * b[i];
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
// Möbius addition formula
|
|
226
|
+
const numeratorCoeffA = 1 + 2 * c * dotAB + c * normBSq;
|
|
227
|
+
const numeratorCoeffB = 1 - c * normASq;
|
|
228
|
+
const denominator = 1 + 2 * c * dotAB + c * c * normASq * normBSq;
|
|
229
|
+
|
|
230
|
+
const result = new Float32Array(a.length);
|
|
231
|
+
for (let i = 0; i < a.length; i++) {
|
|
232
|
+
result[i] = (numeratorCoeffA * a[i] + numeratorCoeffB * b[i]) / Math.max(denominator, epsilon);
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
return clampNorm(result, maxNorm, epsilon);
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
/**
|
|
239
|
+
* Möbius scalar multiplication in Poincaré ball
|
|
240
|
+
*
|
|
241
|
+
* @param r - Scalar
|
|
242
|
+
* @param v - Vector in Poincaré ball
|
|
243
|
+
* @param config - Configuration
|
|
244
|
+
* @returns r ⊗ v in hyperbolic space
|
|
245
|
+
*/
|
|
246
|
+
export function mobiusScalarMul(
|
|
247
|
+
r: number,
|
|
248
|
+
v: Float32Array | number[],
|
|
249
|
+
config: HyperbolicConfig = {}
|
|
250
|
+
): Float32Array {
|
|
251
|
+
const { curvature, epsilon, maxNorm } = { ...DEFAULT_CONFIG, ...config };
|
|
252
|
+
const c = Math.abs(curvature);
|
|
253
|
+
const sqrtC = Math.sqrt(c);
|
|
254
|
+
|
|
255
|
+
const norm = l2Norm(v);
|
|
256
|
+
|
|
257
|
+
if (norm < epsilon) {
|
|
258
|
+
return new Float32Array(v.length);
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// r ⊗ v = tanh(r * arctanh(sqrt(c) * ||v||)) * v / (sqrt(c) * ||v||)
|
|
262
|
+
const factor = Math.tanh(r * Math.atanh(sqrtC * norm)) / (sqrtC * norm);
|
|
263
|
+
|
|
264
|
+
const result = new Float32Array(v.length);
|
|
265
|
+
for (let i = 0; i < v.length; i++) {
|
|
266
|
+
result[i] = v[i] * factor;
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
return clampNorm(result, maxNorm, epsilon);
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* Compute hyperbolic centroid (Fréchet mean) of multiple points
|
|
274
|
+
*
|
|
275
|
+
* Uses iterative optimization to find the centroid in Poincaré ball.
|
|
276
|
+
*
|
|
277
|
+
* @param points - Array of Poincaré embeddings
|
|
278
|
+
* @param config - Configuration
|
|
279
|
+
* @param maxIter - Maximum iterations (default: 100)
|
|
280
|
+
* @returns Hyperbolic centroid
|
|
281
|
+
*/
|
|
282
|
+
export function hyperbolicCentroid(
|
|
283
|
+
points: Array<Float32Array | number[]>,
|
|
284
|
+
config: HyperbolicConfig = {},
|
|
285
|
+
maxIter = 100
|
|
286
|
+
): Float32Array {
|
|
287
|
+
if (points.length === 0) {
|
|
288
|
+
throw new Error('Need at least one point');
|
|
289
|
+
}
|
|
290
|
+
if (points.length === 1) {
|
|
291
|
+
const arr = new Float32Array(points[0].length);
|
|
292
|
+
for (let i = 0; i < points[0].length; i++) {
|
|
293
|
+
arr[i] = points[0][i];
|
|
294
|
+
}
|
|
295
|
+
return arr;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
const { epsilon } = { ...DEFAULT_CONFIG, ...config };
|
|
299
|
+
const dim = points[0].length;
|
|
300
|
+
|
|
301
|
+
// Initialize centroid at Euclidean mean projected to ball
|
|
302
|
+
const centroidInit = new Float32Array(dim);
|
|
303
|
+
for (const p of points) {
|
|
304
|
+
for (let i = 0; i < dim; i++) {
|
|
305
|
+
centroidInit[i] += p[i];
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
for (let i = 0; i < dim; i++) {
|
|
309
|
+
centroidInit[i] /= points.length;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
// Project to Poincaré ball
|
|
313
|
+
const projectedInit = euclideanToPoincare(centroidInit, config);
|
|
314
|
+
let centroid = new Float32Array(dim);
|
|
315
|
+
for (let i = 0; i < dim; i++) {
|
|
316
|
+
centroid[i] = projectedInit[i];
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
// Iterative refinement using Karcher mean algorithm
|
|
320
|
+
for (let iter = 0; iter < maxIter; iter++) {
|
|
321
|
+
const gradient = new Float32Array(dim);
|
|
322
|
+
|
|
323
|
+
for (const p of points) {
|
|
324
|
+
// Log map from centroid to point
|
|
325
|
+
const pArr = p instanceof Float32Array ? p : new Float32Array(p);
|
|
326
|
+
const logMap = logMapAt(centroid, pArr, config);
|
|
327
|
+
for (let i = 0; i < dim; i++) {
|
|
328
|
+
gradient[i] += logMap[i];
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
// Check convergence
|
|
333
|
+
const gradNorm = l2Norm(gradient);
|
|
334
|
+
if (gradNorm < epsilon) break;
|
|
335
|
+
|
|
336
|
+
// Update centroid using exponential map
|
|
337
|
+
for (let i = 0; i < dim; i++) {
|
|
338
|
+
gradient[i] /= points.length;
|
|
339
|
+
}
|
|
340
|
+
const updated = expMapAt(centroid, gradient, config);
|
|
341
|
+
for (let i = 0; i < dim; i++) {
|
|
342
|
+
centroid[i] = updated[i];
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
return centroid;
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
/**
|
|
350
|
+
* Exponential map at point p
|
|
351
|
+
*/
|
|
352
|
+
function expMapAt(
|
|
353
|
+
p: Float32Array,
|
|
354
|
+
v: Float32Array,
|
|
355
|
+
config: HyperbolicConfig = {}
|
|
356
|
+
): Float32Array {
|
|
357
|
+
const { curvature, epsilon, maxNorm } = { ...DEFAULT_CONFIG, ...config };
|
|
358
|
+
const c = Math.abs(curvature);
|
|
359
|
+
|
|
360
|
+
const normP = l2Norm(p);
|
|
361
|
+
const lambdaP = 2 / (1 - c * normP * normP);
|
|
362
|
+
const normV = l2Norm(v);
|
|
363
|
+
|
|
364
|
+
if (normV < epsilon) {
|
|
365
|
+
return new Float32Array(p);
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
const sqrtC = Math.sqrt(c);
|
|
369
|
+
const tanhArg = sqrtC * lambdaP * normV / 2;
|
|
370
|
+
const coeff = Math.tanh(tanhArg) / (sqrtC * normV);
|
|
371
|
+
|
|
372
|
+
const scaledV = new Float32Array(v.length);
|
|
373
|
+
for (let i = 0; i < v.length; i++) {
|
|
374
|
+
scaledV[i] = v[i] * coeff;
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
return clampNorm(mobiusAdd(p, scaledV, config), maxNorm, epsilon);
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
/**
|
|
381
|
+
* Logarithmic map at point p
|
|
382
|
+
*/
|
|
383
|
+
function logMapAt(
|
|
384
|
+
p: Float32Array,
|
|
385
|
+
q: Float32Array,
|
|
386
|
+
config: HyperbolicConfig = {}
|
|
387
|
+
): Float32Array {
|
|
388
|
+
const { curvature, epsilon } = { ...DEFAULT_CONFIG, ...config };
|
|
389
|
+
const c = Math.abs(curvature);
|
|
390
|
+
const sqrtC = Math.sqrt(c);
|
|
391
|
+
|
|
392
|
+
// -p ⊕ q
|
|
393
|
+
const negP = new Float32Array(p.length);
|
|
394
|
+
for (let i = 0; i < p.length; i++) {
|
|
395
|
+
negP[i] = -p[i];
|
|
396
|
+
}
|
|
397
|
+
const diff = mobiusAdd(negP, q, config);
|
|
398
|
+
|
|
399
|
+
const normP = l2Norm(p);
|
|
400
|
+
const normDiff = l2Norm(diff);
|
|
401
|
+
const lambdaP = 2 / (1 - c * normP * normP);
|
|
402
|
+
|
|
403
|
+
if (normDiff < epsilon) {
|
|
404
|
+
return new Float32Array(p.length);
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
const coeff = (2 / (sqrtC * lambdaP)) * Math.atanh(sqrtC * normDiff) / normDiff;
|
|
408
|
+
|
|
409
|
+
const result = new Float32Array(diff.length);
|
|
410
|
+
for (let i = 0; i < diff.length; i++) {
|
|
411
|
+
result[i] = diff[i] * coeff;
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
return result;
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
/**
|
|
418
|
+
* Batch convert Euclidean embeddings to Poincaré ball
|
|
419
|
+
*/
|
|
420
|
+
export function batchEuclideanToPoincare(
|
|
421
|
+
embeddings: Array<Float32Array | number[]>,
|
|
422
|
+
config: HyperbolicConfig = {}
|
|
423
|
+
): Float32Array[] {
|
|
424
|
+
return embeddings.map(e => euclideanToPoincare(e, config));
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
/**
|
|
428
|
+
* Compute pairwise hyperbolic distances
|
|
429
|
+
*/
|
|
430
|
+
export function pairwiseHyperbolicDistances(
|
|
431
|
+
embeddings: Float32Array[],
|
|
432
|
+
config: HyperbolicConfig = {}
|
|
433
|
+
): Float32Array {
|
|
434
|
+
const n = embeddings.length;
|
|
435
|
+
const distances = new Float32Array((n * (n - 1)) / 2);
|
|
436
|
+
|
|
437
|
+
let idx = 0;
|
|
438
|
+
for (let i = 0; i < n; i++) {
|
|
439
|
+
for (let j = i + 1; j < n; j++) {
|
|
440
|
+
distances[idx++] = hyperbolicDistance(embeddings[i], embeddings[j], config);
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
return distances;
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
/**
|
|
448
|
+
* Check if point is inside Poincaré ball
|
|
449
|
+
*/
|
|
450
|
+
export function isInPoincareBall(
|
|
451
|
+
v: Float32Array | number[],
|
|
452
|
+
config: HyperbolicConfig = {}
|
|
453
|
+
): boolean {
|
|
454
|
+
const { curvature } = { ...DEFAULT_CONFIG, ...config };
|
|
455
|
+
const c = Math.abs(curvature);
|
|
456
|
+
const norm = l2Norm(v);
|
|
457
|
+
return norm < 1 / Math.sqrt(c);
|
|
458
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* V3 Embedding Service Module
|
|
3
|
+
*
|
|
4
|
+
* Production embedding service aligned with @sparkleideas/agentic-flow@alpha:
|
|
5
|
+
* - OpenAI provider (text-embedding-3-small/large)
|
|
6
|
+
* - Transformers.js provider (local ONNX models)
|
|
7
|
+
* - Agentic-flow provider (optimized ONNX with SIMD)
|
|
8
|
+
* - Mock provider (development/testing)
|
|
9
|
+
*
|
|
10
|
+
* Additional features:
|
|
11
|
+
* - Persistent SQLite cache
|
|
12
|
+
* - Document chunking with overlap
|
|
13
|
+
* - L2/L1/minmax/zscore normalization
|
|
14
|
+
* - Hyperbolic embeddings (Poincaré ball)
|
|
15
|
+
* - Neural substrate integration (drift, memory, swarm)
|
|
16
|
+
*
|
|
17
|
+
* @module @sparkleideas/embeddings
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
export * from './types.js';
|
|
21
|
+
export * from './embedding-service.js';
|
|
22
|
+
|
|
23
|
+
// Re-export commonly used items at top level
|
|
24
|
+
export {
|
|
25
|
+
createEmbeddingService,
|
|
26
|
+
createEmbeddingServiceAsync,
|
|
27
|
+
getEmbedding,
|
|
28
|
+
cosineSimilarity,
|
|
29
|
+
euclideanDistance,
|
|
30
|
+
dotProduct,
|
|
31
|
+
computeSimilarity,
|
|
32
|
+
OpenAIEmbeddingService,
|
|
33
|
+
TransformersEmbeddingService,
|
|
34
|
+
MockEmbeddingService,
|
|
35
|
+
AgenticFlowEmbeddingService,
|
|
36
|
+
} from './embedding-service.js';
|
|
37
|
+
|
|
38
|
+
export type { AutoEmbeddingConfig } from './embedding-service.js';
|
|
39
|
+
|
|
40
|
+
// Chunking utilities
|
|
41
|
+
export {
|
|
42
|
+
chunkText,
|
|
43
|
+
estimateTokens,
|
|
44
|
+
reconstructFromChunks,
|
|
45
|
+
type ChunkingConfig,
|
|
46
|
+
type Chunk,
|
|
47
|
+
type ChunkedDocument,
|
|
48
|
+
} from './chunking.js';
|
|
49
|
+
|
|
50
|
+
// Normalization utilities
|
|
51
|
+
export {
|
|
52
|
+
l2Normalize,
|
|
53
|
+
l2NormalizeInPlace,
|
|
54
|
+
l1Normalize,
|
|
55
|
+
minMaxNormalize,
|
|
56
|
+
zScoreNormalize,
|
|
57
|
+
normalize,
|
|
58
|
+
normalizeBatch,
|
|
59
|
+
l2Norm,
|
|
60
|
+
isNormalized,
|
|
61
|
+
centerEmbeddings,
|
|
62
|
+
type NormalizationOptions,
|
|
63
|
+
} from './normalization.js';
|
|
64
|
+
|
|
65
|
+
// Hyperbolic embeddings (Poincaré ball)
|
|
66
|
+
export {
|
|
67
|
+
euclideanToPoincare,
|
|
68
|
+
poincareToEuclidean,
|
|
69
|
+
hyperbolicDistance,
|
|
70
|
+
mobiusAdd,
|
|
71
|
+
mobiusScalarMul,
|
|
72
|
+
hyperbolicCentroid,
|
|
73
|
+
batchEuclideanToPoincare,
|
|
74
|
+
pairwiseHyperbolicDistances,
|
|
75
|
+
isInPoincareBall,
|
|
76
|
+
type HyperbolicConfig,
|
|
77
|
+
} from './hyperbolic.js';
|
|
78
|
+
|
|
79
|
+
// Persistent cache
|
|
80
|
+
export {
|
|
81
|
+
PersistentEmbeddingCache,
|
|
82
|
+
isPersistentCacheAvailable,
|
|
83
|
+
type PersistentCacheConfig as DiskCacheConfig,
|
|
84
|
+
type PersistentCacheStats,
|
|
85
|
+
} from './persistent-cache.js';
|
|
86
|
+
|
|
87
|
+
// Neural substrate integration
|
|
88
|
+
export {
|
|
89
|
+
NeuralEmbeddingService,
|
|
90
|
+
createNeuralService,
|
|
91
|
+
isNeuralAvailable,
|
|
92
|
+
listEmbeddingModels,
|
|
93
|
+
downloadEmbeddingModel,
|
|
94
|
+
type DriftResult,
|
|
95
|
+
type MemoryEntry,
|
|
96
|
+
type AgentState,
|
|
97
|
+
type CoherenceResult,
|
|
98
|
+
type SubstrateHealth,
|
|
99
|
+
type NeuralSubstrateConfig,
|
|
100
|
+
} from './neural-integration.js';
|
|
101
|
+
|
|
102
|
+
export type {
|
|
103
|
+
EmbeddingProvider,
|
|
104
|
+
EmbeddingConfig,
|
|
105
|
+
OpenAIEmbeddingConfig,
|
|
106
|
+
TransformersEmbeddingConfig,
|
|
107
|
+
MockEmbeddingConfig,
|
|
108
|
+
AgenticFlowEmbeddingConfig,
|
|
109
|
+
EmbeddingResult,
|
|
110
|
+
BatchEmbeddingResult,
|
|
111
|
+
IEmbeddingService,
|
|
112
|
+
SimilarityMetric,
|
|
113
|
+
SimilarityResult,
|
|
114
|
+
NormalizationType,
|
|
115
|
+
PersistentCacheConfig,
|
|
116
|
+
} from './types.js';
|