elid 0.2.1 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +142 -6
- package/elid.d.ts +485 -83
- package/elid_bg.js +770 -172
- package/elid_bg.wasm +0 -0
- package/package.json +5 -5
package/elid_bg.js
CHANGED
|
@@ -180,92 +180,6 @@ function handleError(f, args) {
|
|
|
180
180
|
wasm.__wbindgen_exn_store(idx);
|
|
181
181
|
}
|
|
182
182
|
}
|
|
183
|
-
/**
|
|
184
|
-
* Compute the normalized SimHash similarity between two strings.
|
|
185
|
-
*
|
|
186
|
-
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
187
|
-
*
|
|
188
|
-
* # JavaScript Example
|
|
189
|
-
*
|
|
190
|
-
* ```javascript
|
|
191
|
-
* import { simhashSimilarity } from 'elid';
|
|
192
|
-
*
|
|
193
|
-
* const similarity = simhashSimilarity("iPhone 14", "iPhone 15");
|
|
194
|
-
* console.log(similarity); // ~0.9 (very similar)
|
|
195
|
-
*
|
|
196
|
-
* const similarity2 = simhashSimilarity("iPhone", "Galaxy");
|
|
197
|
-
* console.log(similarity2); // ~0.4 (different)
|
|
198
|
-
* ```
|
|
199
|
-
* @param {string} a
|
|
200
|
-
* @param {string} b
|
|
201
|
-
* @returns {number}
|
|
202
|
-
*/
|
|
203
|
-
export function simhashSimilarity(a, b) {
|
|
204
|
-
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
205
|
-
const len0 = WASM_VECTOR_LEN;
|
|
206
|
-
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
207
|
-
const len1 = WASM_VECTOR_LEN;
|
|
208
|
-
const ret = wasm.simhashSimilarity(ptr0, len0, ptr1, len1);
|
|
209
|
-
return ret;
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
/**
|
|
213
|
-
* Compute the Jaro-Winkler similarity between two strings.
|
|
214
|
-
*
|
|
215
|
-
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
216
|
-
* Gives more favorable ratings to strings with common prefixes.
|
|
217
|
-
*
|
|
218
|
-
* # JavaScript Example
|
|
219
|
-
*
|
|
220
|
-
* ```javascript
|
|
221
|
-
* import { jaroWinkler } from 'elid';
|
|
222
|
-
*
|
|
223
|
-
* const similarity = jaroWinkler("martha", "marhta");
|
|
224
|
-
* console.log(similarity); // ~0.961
|
|
225
|
-
* ```
|
|
226
|
-
* @param {string} a
|
|
227
|
-
* @param {string} b
|
|
228
|
-
* @returns {number}
|
|
229
|
-
*/
|
|
230
|
-
export function jaroWinkler(a, b) {
|
|
231
|
-
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
232
|
-
const len0 = WASM_VECTOR_LEN;
|
|
233
|
-
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
234
|
-
const len1 = WASM_VECTOR_LEN;
|
|
235
|
-
const ret = wasm.jaroWinkler(ptr0, len0, ptr1, len1);
|
|
236
|
-
return ret;
|
|
237
|
-
}
|
|
238
|
-
|
|
239
|
-
/**
|
|
240
|
-
* Compute the Hamming distance between two strings.
|
|
241
|
-
*
|
|
242
|
-
* Returns the number of positions at which the characters differ.
|
|
243
|
-
* Returns null if strings have different lengths.
|
|
244
|
-
*
|
|
245
|
-
* # JavaScript Example
|
|
246
|
-
*
|
|
247
|
-
* ```javascript
|
|
248
|
-
* import { hamming } from 'elid';
|
|
249
|
-
*
|
|
250
|
-
* const distance = hamming("karolin", "kathrin");
|
|
251
|
-
* console.log(distance); // 3
|
|
252
|
-
*
|
|
253
|
-
* const invalid = hamming("hello", "world!");
|
|
254
|
-
* console.log(invalid); // null
|
|
255
|
-
* ```
|
|
256
|
-
* @param {string} a
|
|
257
|
-
* @param {string} b
|
|
258
|
-
* @returns {number | undefined}
|
|
259
|
-
*/
|
|
260
|
-
export function hamming(a, b) {
|
|
261
|
-
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
262
|
-
const len0 = WASM_VECTOR_LEN;
|
|
263
|
-
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
264
|
-
const len1 = WASM_VECTOR_LEN;
|
|
265
|
-
const ret = wasm.hamming(ptr0, len0, ptr1, len1);
|
|
266
|
-
return ret === 0x100000001 ? undefined : ret;
|
|
267
|
-
}
|
|
268
|
-
|
|
269
183
|
/**
|
|
270
184
|
* Compute the SimHash fingerprint of a string.
|
|
271
185
|
*
|
|
@@ -297,30 +211,41 @@ export function simhash(text) {
|
|
|
297
211
|
return ret;
|
|
298
212
|
}
|
|
299
213
|
|
|
214
|
+
function passArrayJsValueToWasm0(array, malloc) {
|
|
215
|
+
const ptr = malloc(array.length * 4, 4) >>> 0;
|
|
216
|
+
for (let i = 0; i < array.length; i++) {
|
|
217
|
+
const add = addToExternrefTable0(array[i]);
|
|
218
|
+
getDataViewMemory0().setUint32(ptr + 4 * i, add, true);
|
|
219
|
+
}
|
|
220
|
+
WASM_VECTOR_LEN = array.length;
|
|
221
|
+
return ptr;
|
|
222
|
+
}
|
|
300
223
|
/**
|
|
301
|
-
*
|
|
224
|
+
* Find all matches above a threshold score.
|
|
302
225
|
*
|
|
303
|
-
* Returns
|
|
226
|
+
* Returns an array of objects with index and score for all candidates above the threshold.
|
|
304
227
|
*
|
|
305
228
|
* # JavaScript Example
|
|
306
229
|
*
|
|
307
230
|
* ```javascript
|
|
308
|
-
* import {
|
|
231
|
+
* import { findMatchesAboveThreshold } from 'elid';
|
|
309
232
|
*
|
|
310
|
-
* const
|
|
311
|
-
*
|
|
233
|
+
* const candidates = ["apple", "application", "apply", "banana"];
|
|
234
|
+
* const matches = findMatchesAboveThreshold("app", candidates, 0.5);
|
|
235
|
+
* console.log(matches); // [{ index: 0, score: 0.907 }, { index: 1, score: 0.830 }, ...]
|
|
312
236
|
* ```
|
|
313
|
-
* @param {string}
|
|
314
|
-
* @param {string}
|
|
315
|
-
* @
|
|
237
|
+
* @param {string} query
|
|
238
|
+
* @param {string[]} candidates
|
|
239
|
+
* @param {number} threshold
|
|
240
|
+
* @returns {any}
|
|
316
241
|
*/
|
|
317
|
-
export function
|
|
318
|
-
const ptr0 = passStringToWasm0(
|
|
242
|
+
export function findMatchesAboveThreshold(query, candidates, threshold) {
|
|
243
|
+
const ptr0 = passStringToWasm0(query, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
319
244
|
const len0 = WASM_VECTOR_LEN;
|
|
320
|
-
const ptr1 =
|
|
245
|
+
const ptr1 = passArrayJsValueToWasm0(candidates, wasm.__wbindgen_malloc);
|
|
321
246
|
const len1 = WASM_VECTOR_LEN;
|
|
322
|
-
const ret = wasm.
|
|
323
|
-
return ret
|
|
247
|
+
const ret = wasm.findMatchesAboveThreshold(ptr0, len0, ptr1, len1, threshold);
|
|
248
|
+
return ret;
|
|
324
249
|
}
|
|
325
250
|
|
|
326
251
|
let cachedFloat64ArrayMemory0 = null;
|
|
@@ -339,6 +264,69 @@ function passArrayF64ToWasm0(arg, malloc) {
|
|
|
339
264
|
return ptr;
|
|
340
265
|
}
|
|
341
266
|
|
|
267
|
+
function takeFromExternrefTable0(idx) {
|
|
268
|
+
const value = wasm.__wbindgen_externrefs.get(idx);
|
|
269
|
+
wasm.__externref_table_dealloc(idx);
|
|
270
|
+
return value;
|
|
271
|
+
}
|
|
272
|
+
/**
|
|
273
|
+
* Encode an embedding for cross-dimensional comparison.
|
|
274
|
+
*
|
|
275
|
+
* Projects the embedding to a common dimension space, allowing comparison
|
|
276
|
+
* between embeddings of different original dimensions (e.g., 256d vs 768d).
|
|
277
|
+
*
|
|
278
|
+
* # Parameters
|
|
279
|
+
*
|
|
280
|
+
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
281
|
+
* - `common_dims`: Target dimension space (all vectors projected here)
|
|
282
|
+
*
|
|
283
|
+
* # Returns
|
|
284
|
+
*
|
|
285
|
+
* A base32hex-encoded ELID string.
|
|
286
|
+
*
|
|
287
|
+
* # JavaScript Example
|
|
288
|
+
*
|
|
289
|
+
* ```javascript
|
|
290
|
+
* import { encodeElidCrossDimensional, decodeElidToEmbedding } from 'elid';
|
|
291
|
+
*
|
|
292
|
+
* // Different sized embeddings from different models
|
|
293
|
+
* const embedding256 = new Float64Array(256).fill(0.1);
|
|
294
|
+
* const embedding768 = new Float64Array(768).fill(0.1);
|
|
295
|
+
*
|
|
296
|
+
* // Project both to 128-dim common space
|
|
297
|
+
* const elid1 = encodeElidCrossDimensional(embedding256, 128);
|
|
298
|
+
* const elid2 = encodeElidCrossDimensional(embedding768, 128);
|
|
299
|
+
*
|
|
300
|
+
* // Now they can be compared directly (both decode to 128 dims)
|
|
301
|
+
* const dec1 = decodeElidToEmbedding(elid1);
|
|
302
|
+
* const dec2 = decodeElidToEmbedding(elid2);
|
|
303
|
+
* // Both have length 128
|
|
304
|
+
* ```
|
|
305
|
+
* @param {Float64Array} embedding
|
|
306
|
+
* @param {number} common_dims
|
|
307
|
+
* @returns {string}
|
|
308
|
+
*/
|
|
309
|
+
export function encodeElidCrossDimensional(embedding, common_dims) {
|
|
310
|
+
let deferred3_0;
|
|
311
|
+
let deferred3_1;
|
|
312
|
+
try {
|
|
313
|
+
const ptr0 = passArrayF64ToWasm0(embedding, wasm.__wbindgen_malloc);
|
|
314
|
+
const len0 = WASM_VECTOR_LEN;
|
|
315
|
+
const ret = wasm.encodeElidCrossDimensional(ptr0, len0, common_dims);
|
|
316
|
+
var ptr2 = ret[0];
|
|
317
|
+
var len2 = ret[1];
|
|
318
|
+
if (ret[3]) {
|
|
319
|
+
ptr2 = 0; len2 = 0;
|
|
320
|
+
throw takeFromExternrefTable0(ret[2]);
|
|
321
|
+
}
|
|
322
|
+
deferred3_0 = ptr2;
|
|
323
|
+
deferred3_1 = len2;
|
|
324
|
+
return getStringFromWasm0(ptr2, len2);
|
|
325
|
+
} finally {
|
|
326
|
+
wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
|
|
342
330
|
let cachedUint32ArrayMemory0 = null;
|
|
343
331
|
|
|
344
332
|
function getUint32ArrayMemory0() {
|
|
@@ -384,93 +372,409 @@ export function findSimilarHashes(query_hash, candidate_hashes, max_distance) {
|
|
|
384
372
|
return v2;
|
|
385
373
|
}
|
|
386
374
|
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
375
|
+
/**
|
|
376
|
+
* Compute the Jaro similarity between two strings.
|
|
377
|
+
*
|
|
378
|
+
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
379
|
+
* Particularly effective for short strings like names.
|
|
380
|
+
*
|
|
381
|
+
* # JavaScript Example
|
|
382
|
+
*
|
|
383
|
+
* ```javascript
|
|
384
|
+
* import { jaro } from 'elid';
|
|
385
|
+
*
|
|
386
|
+
* const similarity = jaro("martha", "marhta");
|
|
387
|
+
* console.log(similarity); // ~0.944
|
|
388
|
+
* ```
|
|
389
|
+
* @param {string} a
|
|
390
|
+
* @param {string} b
|
|
391
|
+
* @returns {number}
|
|
392
|
+
*/
|
|
393
|
+
export function jaro(a, b) {
|
|
394
|
+
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
395
|
+
const len0 = WASM_VECTOR_LEN;
|
|
396
|
+
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
397
|
+
const len1 = WASM_VECTOR_LEN;
|
|
398
|
+
const ret = wasm.jaro(ptr0, len0, ptr1, len1);
|
|
399
|
+
return ret;
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
/**
|
|
403
|
+
* Compute the best matching similarity between two strings.
|
|
404
|
+
*
|
|
405
|
+
* Runs multiple algorithms and returns the highest score.
|
|
406
|
+
*
|
|
407
|
+
* # JavaScript Example
|
|
408
|
+
*
|
|
409
|
+
* ```javascript
|
|
410
|
+
* import { bestMatch } from 'elid';
|
|
411
|
+
*
|
|
412
|
+
* const score = bestMatch("hello", "hallo");
|
|
413
|
+
* console.log(score); // ~0.8
|
|
414
|
+
* ```
|
|
415
|
+
* @param {string} a
|
|
416
|
+
* @param {string} b
|
|
417
|
+
* @returns {number}
|
|
418
|
+
*/
|
|
419
|
+
export function bestMatch(a, b) {
|
|
420
|
+
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
421
|
+
const len0 = WASM_VECTOR_LEN;
|
|
422
|
+
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
423
|
+
const len1 = WASM_VECTOR_LEN;
|
|
424
|
+
const ret = wasm.bestMatch(ptr0, len0, ptr1, len1);
|
|
425
|
+
return ret;
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
/**
|
|
429
|
+
* Encode an embedding vector to an ELID string.
|
|
430
|
+
*
|
|
431
|
+
* Converts a high-dimensional embedding (64-2048 dimensions) into a compact,
|
|
432
|
+
* sortable identifier. The ELID preserves locality properties for efficient
|
|
433
|
+
* similarity search.
|
|
434
|
+
*
|
|
435
|
+
* # Parameters
|
|
436
|
+
*
|
|
437
|
+
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
438
|
+
* - `profile`: Encoding profile (Mini128, Morton10x10, or Hilbert10x10)
|
|
439
|
+
*
|
|
440
|
+
* # Returns
|
|
441
|
+
*
|
|
442
|
+
* A base32hex-encoded ELID string suitable for storage and comparison.
|
|
443
|
+
*
|
|
444
|
+
* # JavaScript Example
|
|
445
|
+
*
|
|
446
|
+
* ```javascript
|
|
447
|
+
* import { encodeElid, ElidProfile } from 'elid';
|
|
448
|
+
*
|
|
449
|
+
* // OpenAI embeddings are 1536 dimensions
|
|
450
|
+
* const embedding = await getEmbedding("Hello world");
|
|
451
|
+
* const elid = encodeElid(embedding, ElidProfile.Mini128);
|
|
452
|
+
* console.log(elid); // "012345abcdef..."
|
|
453
|
+
* ```
|
|
454
|
+
* @param {Float64Array} embedding
|
|
455
|
+
* @param {ElidProfile} profile
|
|
456
|
+
* @returns {string}
|
|
457
|
+
*/
|
|
458
|
+
export function encodeElid(embedding, profile) {
|
|
459
|
+
let deferred3_0;
|
|
460
|
+
let deferred3_1;
|
|
461
|
+
try {
|
|
462
|
+
const ptr0 = passArrayF64ToWasm0(embedding, wasm.__wbindgen_malloc);
|
|
463
|
+
const len0 = WASM_VECTOR_LEN;
|
|
464
|
+
const ret = wasm.encodeElid(ptr0, len0, profile);
|
|
465
|
+
var ptr2 = ret[0];
|
|
466
|
+
var len2 = ret[1];
|
|
467
|
+
if (ret[3]) {
|
|
468
|
+
ptr2 = 0; len2 = 0;
|
|
469
|
+
throw takeFromExternrefTable0(ret[2]);
|
|
470
|
+
}
|
|
471
|
+
deferred3_0 = ptr2;
|
|
472
|
+
deferred3_1 = len2;
|
|
473
|
+
return getStringFromWasm0(ptr2, len2);
|
|
474
|
+
} finally {
|
|
475
|
+
wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
|
|
392
476
|
}
|
|
393
|
-
WASM_VECTOR_LEN = array.length;
|
|
394
|
-
return ptr;
|
|
395
477
|
}
|
|
478
|
+
|
|
396
479
|
/**
|
|
397
|
-
*
|
|
480
|
+
* Compute the normalized SimHash similarity between two strings.
|
|
481
|
+
*
|
|
482
|
+
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
483
|
+
*
|
|
484
|
+
* # JavaScript Example
|
|
485
|
+
*
|
|
486
|
+
* ```javascript
|
|
487
|
+
* import { simhashSimilarity } from 'elid';
|
|
488
|
+
*
|
|
489
|
+
* const similarity = simhashSimilarity("iPhone 14", "iPhone 15");
|
|
490
|
+
* console.log(similarity); // ~0.9 (very similar)
|
|
491
|
+
*
|
|
492
|
+
* const similarity2 = simhashSimilarity("iPhone", "Galaxy");
|
|
493
|
+
* console.log(similarity2); // ~0.4 (different)
|
|
494
|
+
* ```
|
|
495
|
+
* @param {string} a
|
|
496
|
+
* @param {string} b
|
|
497
|
+
* @returns {number}
|
|
498
|
+
*/
|
|
499
|
+
export function simhashSimilarity(a, b) {
|
|
500
|
+
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
501
|
+
const len0 = WASM_VECTOR_LEN;
|
|
502
|
+
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
503
|
+
const len1 = WASM_VECTOR_LEN;
|
|
504
|
+
const ret = wasm.simhashSimilarity(ptr0, len0, ptr1, len1);
|
|
505
|
+
return ret;
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
/**
|
|
509
|
+
* Encode an embedding using lossless full vector encoding.
|
|
510
|
+
*
|
|
511
|
+
* Preserves the exact embedding values (32-bit float precision) and all dimensions.
|
|
512
|
+
* This produces the largest output but allows exact reconstruction.
|
|
513
|
+
*
|
|
514
|
+
* # Parameters
|
|
515
|
+
*
|
|
516
|
+
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
517
|
+
*
|
|
518
|
+
* # Returns
|
|
519
|
+
*
|
|
520
|
+
* A base32hex-encoded ELID string that can be decoded back to the original embedding.
|
|
521
|
+
*
|
|
522
|
+
* # JavaScript Example
|
|
523
|
+
*
|
|
524
|
+
* ```javascript
|
|
525
|
+
* import { encodeElidLossless, decodeElidToEmbedding } from 'elid';
|
|
526
|
+
*
|
|
527
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
528
|
+
* const elid = encodeElidLossless(embedding);
|
|
529
|
+
*
|
|
530
|
+
* // Later, recover the exact embedding
|
|
531
|
+
* const recovered = decodeElidToEmbedding(elid);
|
|
532
|
+
* // recovered is identical to embedding
|
|
533
|
+
* ```
|
|
534
|
+
* @param {Float64Array} embedding
|
|
535
|
+
* @returns {string}
|
|
536
|
+
*/
|
|
537
|
+
export function encodeElidLossless(embedding) {
|
|
538
|
+
let deferred3_0;
|
|
539
|
+
let deferred3_1;
|
|
540
|
+
try {
|
|
541
|
+
const ptr0 = passArrayF64ToWasm0(embedding, wasm.__wbindgen_malloc);
|
|
542
|
+
const len0 = WASM_VECTOR_LEN;
|
|
543
|
+
const ret = wasm.encodeElidLossless(ptr0, len0);
|
|
544
|
+
var ptr2 = ret[0];
|
|
545
|
+
var len2 = ret[1];
|
|
546
|
+
if (ret[3]) {
|
|
547
|
+
ptr2 = 0; len2 = 0;
|
|
548
|
+
throw takeFromExternrefTable0(ret[2]);
|
|
549
|
+
}
|
|
550
|
+
deferred3_0 = ptr2;
|
|
551
|
+
deferred3_1 = len2;
|
|
552
|
+
return getStringFromWasm0(ptr2, len2);
|
|
553
|
+
} finally {
|
|
554
|
+
wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
/**
|
|
559
|
+
* Compute the normalized Levenshtein similarity between two strings.
|
|
560
|
+
*
|
|
561
|
+
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
562
|
+
*
|
|
563
|
+
* # JavaScript Example
|
|
564
|
+
*
|
|
565
|
+
* ```javascript
|
|
566
|
+
* import { normalizedLevenshtein } from 'elid';
|
|
567
|
+
*
|
|
568
|
+
* const similarity = normalizedLevenshtein("hello", "hallo");
|
|
569
|
+
* console.log(similarity); // ~0.8
|
|
570
|
+
* ```
|
|
571
|
+
* @param {string} a
|
|
572
|
+
* @param {string} b
|
|
573
|
+
* @returns {number}
|
|
574
|
+
*/
|
|
575
|
+
export function normalizedLevenshtein(a, b) {
|
|
576
|
+
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
577
|
+
const len0 = WASM_VECTOR_LEN;
|
|
578
|
+
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
579
|
+
const len1 = WASM_VECTOR_LEN;
|
|
580
|
+
const ret = wasm.normalizedLevenshtein(ptr0, len0, ptr1, len1);
|
|
581
|
+
return ret;
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
/**
|
|
585
|
+
* Decode an ELID string back to an embedding vector.
|
|
586
|
+
*
|
|
587
|
+
* Only works for ELIDs encoded with a FullVector profile (lossless,
|
|
588
|
+
* compressed, or max_length). Returns null for non-reversible profiles
|
|
589
|
+
* like Mini128, Morton, or Hilbert.
|
|
590
|
+
*
|
|
591
|
+
* # Parameters
|
|
592
|
+
*
|
|
593
|
+
* - `elid_str`: A valid ELID string (base32hex encoded)
|
|
594
|
+
*
|
|
595
|
+
* # Returns
|
|
596
|
+
*
|
|
597
|
+
* A Float64Array containing the decoded embedding, or null if the ELID
|
|
598
|
+
* is not reversible.
|
|
599
|
+
*
|
|
600
|
+
* Note: If dimension reduction was used during encoding, the decoded
|
|
601
|
+
* embedding will be in the reduced dimension space, not the original.
|
|
602
|
+
*
|
|
603
|
+
* # JavaScript Example
|
|
604
|
+
*
|
|
605
|
+
* ```javascript
|
|
606
|
+
* import { encodeElidLossless, decodeElidToEmbedding, isElidReversible } from 'elid';
|
|
607
|
+
*
|
|
608
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
609
|
+
* const elid = encodeElidLossless(embedding);
|
|
610
|
+
*
|
|
611
|
+
* if (isElidReversible(elid)) {
|
|
612
|
+
* const recovered = decodeElidToEmbedding(elid);
|
|
613
|
+
* console.log(recovered.length); // 768
|
|
614
|
+
* }
|
|
615
|
+
* ```
|
|
616
|
+
* @param {string} elid_str
|
|
617
|
+
* @returns {any}
|
|
618
|
+
*/
|
|
619
|
+
export function decodeElidToEmbedding(elid_str) {
|
|
620
|
+
const ptr0 = passStringToWasm0(elid_str, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
621
|
+
const len0 = WASM_VECTOR_LEN;
|
|
622
|
+
const ret = wasm.decodeElidToEmbedding(ptr0, len0);
|
|
623
|
+
if (ret[2]) {
|
|
624
|
+
throw takeFromExternrefTable0(ret[1]);
|
|
625
|
+
}
|
|
626
|
+
return takeFromExternrefTable0(ret[0]);
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
/**
|
|
630
|
+
* Get metadata about a FullVector ELID.
|
|
631
|
+
*
|
|
632
|
+
* Returns an object containing information about how the ELID was encoded,
|
|
633
|
+
* including original dimensions, precision, and dimension mode.
|
|
634
|
+
*
|
|
635
|
+
* # Parameters
|
|
636
|
+
*
|
|
637
|
+
* - `elid_str`: A valid ELID string (base32hex encoded)
|
|
638
|
+
*
|
|
639
|
+
* # Returns
|
|
640
|
+
*
|
|
641
|
+
* An object with metadata fields, or null if not a FullVector ELID.
|
|
642
|
+
*
|
|
643
|
+
* # JavaScript Example
|
|
644
|
+
*
|
|
645
|
+
* ```javascript
|
|
646
|
+
* import { encodeElidCompressed, getElidMetadata } from 'elid';
|
|
647
|
+
*
|
|
648
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
649
|
+
* const elid = encodeElidCompressed(embedding, 0.5);
|
|
650
|
+
*
|
|
651
|
+
* const meta = getElidMetadata(elid);
|
|
652
|
+
* if (meta) {
|
|
653
|
+
* console.log(meta.originalDims); // 768
|
|
654
|
+
* console.log(meta.encodedDims); // depends on compression
|
|
655
|
+
* console.log(meta.isLossless); // false
|
|
656
|
+
* }
|
|
657
|
+
* ```
|
|
658
|
+
* @param {string} elid_str
|
|
659
|
+
* @returns {any}
|
|
660
|
+
*/
|
|
661
|
+
export function getElidMetadata(elid_str) {
|
|
662
|
+
const ptr0 = passStringToWasm0(elid_str, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
663
|
+
const len0 = WASM_VECTOR_LEN;
|
|
664
|
+
const ret = wasm.getElidMetadata(ptr0, len0);
|
|
665
|
+
if (ret[2]) {
|
|
666
|
+
throw takeFromExternrefTable0(ret[1]);
|
|
667
|
+
}
|
|
668
|
+
return takeFromExternrefTable0(ret[0]);
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
function getArrayU8FromWasm0(ptr, len) {
|
|
672
|
+
ptr = ptr >>> 0;
|
|
673
|
+
return getUint8ArrayMemory0().subarray(ptr / 1, ptr / 1 + len);
|
|
674
|
+
}
|
|
675
|
+
/**
|
|
676
|
+
* Decode an ELID string to raw bytes.
|
|
677
|
+
*
|
|
678
|
+
* Returns the raw byte representation of an ELID, including the header
|
|
679
|
+
* and payload bytes. Useful for custom processing or debugging.
|
|
680
|
+
*
|
|
681
|
+
* # Parameters
|
|
682
|
+
*
|
|
683
|
+
* - `elid_str`: A valid ELID string (base32hex encoded)
|
|
398
684
|
*
|
|
399
|
-
* Returns
|
|
685
|
+
* # Returns
|
|
686
|
+
*
|
|
687
|
+
* A Uint8Array containing the raw bytes (header + payload).
|
|
400
688
|
*
|
|
401
689
|
* # JavaScript Example
|
|
402
690
|
*
|
|
403
691
|
* ```javascript
|
|
404
|
-
* import {
|
|
692
|
+
* import { decodeElid } from 'elid';
|
|
405
693
|
*
|
|
406
|
-
* const
|
|
407
|
-
*
|
|
408
|
-
* console.log(matches); // [{ index: 0, score: 0.907 }, { index: 1, score: 0.830 }, ...]
|
|
694
|
+
* const bytes = decodeElid("012345abcdef...");
|
|
695
|
+
* console.log(bytes); // Uint8Array [...]
|
|
409
696
|
* ```
|
|
410
|
-
* @param {string}
|
|
411
|
-
* @
|
|
412
|
-
* @param {number} threshold
|
|
413
|
-
* @returns {any}
|
|
697
|
+
* @param {string} elid_str
|
|
698
|
+
* @returns {Uint8Array}
|
|
414
699
|
*/
|
|
415
|
-
export function
|
|
416
|
-
const ptr0 = passStringToWasm0(
|
|
700
|
+
export function decodeElid(elid_str) {
|
|
701
|
+
const ptr0 = passStringToWasm0(elid_str, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
417
702
|
const len0 = WASM_VECTOR_LEN;
|
|
418
|
-
const
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
703
|
+
const ret = wasm.decodeElid(ptr0, len0);
|
|
704
|
+
if (ret[3]) {
|
|
705
|
+
throw takeFromExternrefTable0(ret[2]);
|
|
706
|
+
}
|
|
707
|
+
var v2 = getArrayU8FromWasm0(ret[0], ret[1]).slice();
|
|
708
|
+
wasm.__wbindgen_free(ret[0], ret[1] * 1, 1);
|
|
709
|
+
return v2;
|
|
422
710
|
}
|
|
423
711
|
|
|
424
712
|
/**
|
|
425
|
-
* Compute the
|
|
713
|
+
* Compute the Levenshtein distance between two strings.
|
|
426
714
|
*
|
|
427
|
-
*
|
|
715
|
+
* Returns the minimum number of single-character edits needed to transform one string into another.
|
|
428
716
|
*
|
|
429
717
|
* # JavaScript Example
|
|
430
718
|
*
|
|
431
719
|
* ```javascript
|
|
432
|
-
* import {
|
|
720
|
+
* import { levenshtein } from 'elid';
|
|
433
721
|
*
|
|
434
|
-
* const distance =
|
|
435
|
-
* console.log(distance); //
|
|
722
|
+
* const distance = levenshtein("kitten", "sitting");
|
|
723
|
+
* console.log(distance); // 3
|
|
436
724
|
* ```
|
|
437
725
|
* @param {string} a
|
|
438
726
|
* @param {string} b
|
|
439
727
|
* @returns {number}
|
|
440
728
|
*/
|
|
441
|
-
export function
|
|
729
|
+
export function levenshtein(a, b) {
|
|
442
730
|
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
443
731
|
const len0 = WASM_VECTOR_LEN;
|
|
444
732
|
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
445
733
|
const len1 = WASM_VECTOR_LEN;
|
|
446
|
-
const ret = wasm.
|
|
734
|
+
const ret = wasm.levenshtein(ptr0, len0, ptr1, len1);
|
|
447
735
|
return ret >>> 0;
|
|
448
736
|
}
|
|
449
737
|
|
|
450
738
|
/**
|
|
451
|
-
*
|
|
739
|
+
* Check if an ELID can be decoded back to an embedding.
|
|
452
740
|
*
|
|
453
|
-
* Returns
|
|
741
|
+
* Returns true if the ELID was encoded with a FullVector profile
|
|
742
|
+
* (lossless, compressed, or max_length), false otherwise.
|
|
743
|
+
*
|
|
744
|
+
* # Parameters
|
|
745
|
+
*
|
|
746
|
+
* - `elid_str`: A valid ELID string (base32hex encoded)
|
|
747
|
+
*
|
|
748
|
+
* # Returns
|
|
749
|
+
*
|
|
750
|
+
* `true` if decodeElidToEmbedding will return an embedding, `false` otherwise.
|
|
454
751
|
*
|
|
455
752
|
* # JavaScript Example
|
|
456
753
|
*
|
|
457
754
|
* ```javascript
|
|
458
|
-
* import {
|
|
755
|
+
* import { encodeElid, encodeElidLossless, isElidReversible, ElidProfile } from 'elid';
|
|
459
756
|
*
|
|
460
|
-
* const
|
|
461
|
-
*
|
|
757
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
758
|
+
*
|
|
759
|
+
* // Mini128 is NOT reversible
|
|
760
|
+
* const mini128Elid = encodeElid(embedding, ElidProfile.Mini128);
|
|
761
|
+
* console.log(isElidReversible(mini128Elid)); // false
|
|
762
|
+
*
|
|
763
|
+
* // Lossless IS reversible
|
|
764
|
+
* const losslessElid = encodeElidLossless(embedding);
|
|
765
|
+
* console.log(isElidReversible(losslessElid)); // true
|
|
462
766
|
* ```
|
|
463
|
-
* @param {string}
|
|
464
|
-
* @
|
|
465
|
-
* @returns {number}
|
|
767
|
+
* @param {string} elid_str
|
|
768
|
+
* @returns {boolean}
|
|
466
769
|
*/
|
|
467
|
-
export function
|
|
468
|
-
const ptr0 = passStringToWasm0(
|
|
770
|
+
export function isElidReversible(elid_str) {
|
|
771
|
+
const ptr0 = passStringToWasm0(elid_str, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
469
772
|
const len0 = WASM_VECTOR_LEN;
|
|
470
|
-
const
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
773
|
+
const ret = wasm.isElidReversible(ptr0, len0);
|
|
774
|
+
if (ret[2]) {
|
|
775
|
+
throw takeFromExternrefTable0(ret[1]);
|
|
776
|
+
}
|
|
777
|
+
return ret[0] !== 0;
|
|
474
778
|
}
|
|
475
779
|
|
|
476
780
|
function _assertClass(instance, klass) {
|
|
@@ -510,29 +814,105 @@ export function levenshteinWithOpts(a, b, opts) {
|
|
|
510
814
|
}
|
|
511
815
|
|
|
512
816
|
/**
|
|
513
|
-
* Compute the
|
|
817
|
+
* Compute the Hamming distance between two ELID strings.
|
|
514
818
|
*
|
|
515
|
-
*
|
|
819
|
+
* Returns the number of differing bits between two Mini128 ELIDs.
|
|
820
|
+
* This distance is proportional to the angular distance between the
|
|
821
|
+
* original embeddings (lower = more similar).
|
|
822
|
+
*
|
|
823
|
+
* # Requirements
|
|
824
|
+
*
|
|
825
|
+
* Both ELIDs must use the Mini128 profile.
|
|
826
|
+
*
|
|
827
|
+
* # Parameters
|
|
828
|
+
*
|
|
829
|
+
* - `elid1`: First ELID string
|
|
830
|
+
* - `elid2`: Second ELID string
|
|
831
|
+
*
|
|
832
|
+
* # Returns
|
|
833
|
+
*
|
|
834
|
+
* Hamming distance (0-128). 0 means identical, 128 means completely different.
|
|
516
835
|
*
|
|
517
836
|
* # JavaScript Example
|
|
518
837
|
*
|
|
519
838
|
* ```javascript
|
|
520
|
-
* import {
|
|
839
|
+
* import { encodeElid, elidHammingDistance, ElidProfile } from 'elid';
|
|
521
840
|
*
|
|
522
|
-
* const
|
|
523
|
-
*
|
|
841
|
+
* const elid1 = encodeElid(embedding1, ElidProfile.Mini128);
|
|
842
|
+
* const elid2 = encodeElid(embedding2, ElidProfile.Mini128);
|
|
843
|
+
*
|
|
844
|
+
* const distance = elidHammingDistance(elid1, elid2);
|
|
845
|
+
* if (distance < 20) {
|
|
846
|
+
* console.log("Very similar embeddings!");
|
|
847
|
+
* }
|
|
848
|
+
* ```
|
|
849
|
+
* @param {string} elid1
|
|
850
|
+
* @param {string} elid2
|
|
851
|
+
* @returns {number}
|
|
852
|
+
*/
|
|
853
|
+
export function elidHammingDistance(elid1, elid2) {
|
|
854
|
+
const ptr0 = passStringToWasm0(elid1, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
855
|
+
const len0 = WASM_VECTOR_LEN;
|
|
856
|
+
const ptr1 = passStringToWasm0(elid2, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
857
|
+
const len1 = WASM_VECTOR_LEN;
|
|
858
|
+
const ret = wasm.elidHammingDistance(ptr0, len0, ptr1, len1);
|
|
859
|
+
if (ret[2]) {
|
|
860
|
+
throw takeFromExternrefTable0(ret[1]);
|
|
861
|
+
}
|
|
862
|
+
return ret[0] >>> 0;
|
|
863
|
+
}
|
|
864
|
+
|
|
865
|
+
/**
|
|
866
|
+
* Find the best match for a query string in an array of candidates.
|
|
867
|
+
*
|
|
868
|
+
* Returns an object with the index and similarity score of the best match.
|
|
869
|
+
*
|
|
870
|
+
* # JavaScript Example
|
|
871
|
+
*
|
|
872
|
+
* ```javascript
|
|
873
|
+
* import { findBestMatch } from 'elid';
|
|
874
|
+
*
|
|
875
|
+
* const candidates = ["apple", "application", "apply"];
|
|
876
|
+
* const result = findBestMatch("app", candidates);
|
|
877
|
+
* console.log(result); // { index: 0, score: 0.907 }
|
|
878
|
+
* ```
|
|
879
|
+
* @param {string} query
|
|
880
|
+
* @param {string[]} candidates
|
|
881
|
+
* @returns {object}
|
|
882
|
+
*/
|
|
883
|
+
export function findBestMatch(query, candidates) {
|
|
884
|
+
const ptr0 = passStringToWasm0(query, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
885
|
+
const len0 = WASM_VECTOR_LEN;
|
|
886
|
+
const ptr1 = passArrayJsValueToWasm0(candidates, wasm.__wbindgen_malloc);
|
|
887
|
+
const len1 = WASM_VECTOR_LEN;
|
|
888
|
+
const ret = wasm.findBestMatch(ptr0, len0, ptr1, len1);
|
|
889
|
+
return ret;
|
|
890
|
+
}
|
|
891
|
+
|
|
892
|
+
/**
|
|
893
|
+
* Compute the OSA (Optimal String Alignment) distance between two strings.
|
|
894
|
+
*
|
|
895
|
+
* Similar to Levenshtein but also considers transpositions as a single operation.
|
|
896
|
+
*
|
|
897
|
+
* # JavaScript Example
|
|
898
|
+
*
|
|
899
|
+
* ```javascript
|
|
900
|
+
* import { osaDistance } from 'elid';
|
|
901
|
+
*
|
|
902
|
+
* const distance = osaDistance("ca", "ac");
|
|
903
|
+
* console.log(distance); // 1 (transposition)
|
|
524
904
|
* ```
|
|
525
905
|
* @param {string} a
|
|
526
906
|
* @param {string} b
|
|
527
907
|
* @returns {number}
|
|
528
908
|
*/
|
|
529
|
-
export function
|
|
909
|
+
export function osaDistance(a, b) {
|
|
530
910
|
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
531
911
|
const len0 = WASM_VECTOR_LEN;
|
|
532
912
|
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
533
913
|
const len1 = WASM_VECTOR_LEN;
|
|
534
|
-
const ret = wasm.
|
|
535
|
-
return ret;
|
|
914
|
+
const ret = wasm.osaDistance(ptr0, len0, ptr1, len1);
|
|
915
|
+
return ret >>> 0;
|
|
536
916
|
}
|
|
537
917
|
|
|
538
918
|
/**
|
|
@@ -561,59 +941,268 @@ export function simhashDistance(hash1, hash2) {
|
|
|
561
941
|
}
|
|
562
942
|
|
|
563
943
|
/**
|
|
564
|
-
*
|
|
944
|
+
* Encode an embedding with percentage-based compression.
|
|
945
|
+
*
|
|
946
|
+
* The retention percentage (0.0-1.0) controls how much information is preserved:
|
|
947
|
+
* - 1.0 = lossless (Full32 precision, all dimensions)
|
|
948
|
+
* - 0.5 = half precision and/or half dimensions
|
|
949
|
+
* - 0.25 = quarter precision and/or quarter dimensions
|
|
950
|
+
*
|
|
951
|
+
* The algorithm optimizes for dimension reduction first (which preserves
|
|
952
|
+
* more geometric relationships) before reducing precision.
|
|
953
|
+
*
|
|
954
|
+
* # Parameters
|
|
955
|
+
*
|
|
956
|
+
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
957
|
+
* - `retention_pct`: Information retention percentage (0.0-1.0)
|
|
958
|
+
*
|
|
959
|
+
* # Returns
|
|
960
|
+
*
|
|
961
|
+
* A base32hex-encoded ELID string.
|
|
962
|
+
*
|
|
963
|
+
* # JavaScript Example
|
|
964
|
+
*
|
|
965
|
+
* ```javascript
|
|
966
|
+
* import { encodeElidCompressed } from 'elid';
|
|
967
|
+
*
|
|
968
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
969
|
+
*
|
|
970
|
+
* // 50% retention - good balance of size and fidelity
|
|
971
|
+
* const elid = encodeElidCompressed(embedding, 0.5);
|
|
972
|
+
*
|
|
973
|
+
* // 25% retention - smaller but less accurate
|
|
974
|
+
* const smallElid = encodeElidCompressed(embedding, 0.25);
|
|
975
|
+
* ```
|
|
976
|
+
* @param {Float64Array} embedding
|
|
977
|
+
* @param {number} retention_pct
|
|
978
|
+
* @returns {string}
|
|
979
|
+
*/
|
|
980
|
+
export function encodeElidCompressed(embedding, retention_pct) {
|
|
981
|
+
let deferred3_0;
|
|
982
|
+
let deferred3_1;
|
|
983
|
+
try {
|
|
984
|
+
const ptr0 = passArrayF64ToWasm0(embedding, wasm.__wbindgen_malloc);
|
|
985
|
+
const len0 = WASM_VECTOR_LEN;
|
|
986
|
+
const ret = wasm.encodeElidCompressed(ptr0, len0, retention_pct);
|
|
987
|
+
var ptr2 = ret[0];
|
|
988
|
+
var len2 = ret[1];
|
|
989
|
+
if (ret[3]) {
|
|
990
|
+
ptr2 = 0; len2 = 0;
|
|
991
|
+
throw takeFromExternrefTable0(ret[2]);
|
|
992
|
+
}
|
|
993
|
+
deferred3_0 = ptr2;
|
|
994
|
+
deferred3_1 = len2;
|
|
995
|
+
return getStringFromWasm0(ptr2, len2);
|
|
996
|
+
} finally {
|
|
997
|
+
wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
|
|
998
|
+
}
|
|
999
|
+
}
|
|
1000
|
+
|
|
1001
|
+
/**
|
|
1002
|
+
* Encode an embedding with a maximum output string length constraint.
|
|
1003
|
+
*
|
|
1004
|
+
* Calculates the optimal precision and dimension settings to fit within
|
|
1005
|
+
* the specified character limit while maximizing fidelity.
|
|
1006
|
+
*
|
|
1007
|
+
* # Parameters
|
|
1008
|
+
*
|
|
1009
|
+
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
1010
|
+
* - `max_chars`: Maximum output string length in characters
|
|
1011
|
+
*
|
|
1012
|
+
* # Returns
|
|
1013
|
+
*
|
|
1014
|
+
* A base32hex-encoded ELID string guaranteed to be <= max_chars in length.
|
|
1015
|
+
*
|
|
1016
|
+
* # JavaScript Example
|
|
1017
|
+
*
|
|
1018
|
+
* ```javascript
|
|
1019
|
+
* import { encodeElidMaxLength } from 'elid';
|
|
1020
|
+
*
|
|
1021
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
1022
|
+
*
|
|
1023
|
+
* // Fit in 100 characters (e.g., for database column constraints)
|
|
1024
|
+
* const elid = encodeElidMaxLength(embedding, 100);
|
|
1025
|
+
* console.log(elid.length <= 100); // true
|
|
1026
|
+
*
|
|
1027
|
+
* // Fit in 50 characters (more compression)
|
|
1028
|
+
* const shortElid = encodeElidMaxLength(embedding, 50);
|
|
1029
|
+
* ```
|
|
1030
|
+
* @param {Float64Array} embedding
|
|
1031
|
+
* @param {number} max_chars
|
|
1032
|
+
* @returns {string}
|
|
1033
|
+
*/
|
|
1034
|
+
export function encodeElidMaxLength(embedding, max_chars) {
|
|
1035
|
+
let deferred3_0;
|
|
1036
|
+
let deferred3_1;
|
|
1037
|
+
try {
|
|
1038
|
+
const ptr0 = passArrayF64ToWasm0(embedding, wasm.__wbindgen_malloc);
|
|
1039
|
+
const len0 = WASM_VECTOR_LEN;
|
|
1040
|
+
const ret = wasm.encodeElidMaxLength(ptr0, len0, max_chars);
|
|
1041
|
+
var ptr2 = ret[0];
|
|
1042
|
+
var len2 = ret[1];
|
|
1043
|
+
if (ret[3]) {
|
|
1044
|
+
ptr2 = 0; len2 = 0;
|
|
1045
|
+
throw takeFromExternrefTable0(ret[2]);
|
|
1046
|
+
}
|
|
1047
|
+
deferred3_0 = ptr2;
|
|
1048
|
+
deferred3_1 = len2;
|
|
1049
|
+
return getStringFromWasm0(ptr2, len2);
|
|
1050
|
+
} finally {
|
|
1051
|
+
wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
|
|
1052
|
+
}
|
|
1053
|
+
}
|
|
1054
|
+
|
|
1055
|
+
/**
|
|
1056
|
+
* Compute the Jaro-Winkler similarity between two strings.
|
|
565
1057
|
*
|
|
566
1058
|
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
567
|
-
*
|
|
1059
|
+
* Gives more favorable ratings to strings with common prefixes.
|
|
568
1060
|
*
|
|
569
1061
|
* # JavaScript Example
|
|
570
1062
|
*
|
|
571
1063
|
* ```javascript
|
|
572
|
-
* import {
|
|
1064
|
+
* import { jaroWinkler } from 'elid';
|
|
573
1065
|
*
|
|
574
|
-
* const similarity =
|
|
575
|
-
* console.log(similarity); // ~0.
|
|
1066
|
+
* const similarity = jaroWinkler("martha", "marhta");
|
|
1067
|
+
* console.log(similarity); // ~0.961
|
|
576
1068
|
* ```
|
|
577
1069
|
* @param {string} a
|
|
578
1070
|
* @param {string} b
|
|
579
1071
|
* @returns {number}
|
|
580
1072
|
*/
|
|
581
|
-
export function
|
|
1073
|
+
export function jaroWinkler(a, b) {
|
|
582
1074
|
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
583
1075
|
const len0 = WASM_VECTOR_LEN;
|
|
584
1076
|
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
585
1077
|
const len1 = WASM_VECTOR_LEN;
|
|
586
|
-
const ret = wasm.
|
|
1078
|
+
const ret = wasm.jaroWinkler(ptr0, len0, ptr1, len1);
|
|
587
1079
|
return ret;
|
|
588
1080
|
}
|
|
589
1081
|
|
|
590
1082
|
/**
|
|
591
|
-
*
|
|
1083
|
+
* Compute the Hamming distance between two strings.
|
|
592
1084
|
*
|
|
593
|
-
* Returns
|
|
1085
|
+
* Returns the number of positions at which the characters differ.
|
|
1086
|
+
* Returns null if strings have different lengths.
|
|
594
1087
|
*
|
|
595
1088
|
* # JavaScript Example
|
|
596
1089
|
*
|
|
597
1090
|
* ```javascript
|
|
598
|
-
* import {
|
|
1091
|
+
* import { hamming } from 'elid';
|
|
599
1092
|
*
|
|
600
|
-
* const
|
|
601
|
-
*
|
|
602
|
-
*
|
|
1093
|
+
* const distance = hamming("karolin", "kathrin");
|
|
1094
|
+
* console.log(distance); // 3
|
|
1095
|
+
*
|
|
1096
|
+
* const invalid = hamming("hello", "world!");
|
|
1097
|
+
* console.log(invalid); // null
|
|
603
1098
|
* ```
|
|
604
|
-
* @param {string}
|
|
605
|
-
* @param {string
|
|
606
|
-
* @returns {
|
|
1099
|
+
* @param {string} a
|
|
1100
|
+
* @param {string} b
|
|
1101
|
+
* @returns {number | undefined}
|
|
607
1102
|
*/
|
|
608
|
-
export function
|
|
609
|
-
const ptr0 = passStringToWasm0(
|
|
1103
|
+
export function hamming(a, b) {
|
|
1104
|
+
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
610
1105
|
const len0 = WASM_VECTOR_LEN;
|
|
611
|
-
const ptr1 =
|
|
1106
|
+
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
612
1107
|
const len1 = WASM_VECTOR_LEN;
|
|
613
|
-
const ret = wasm.
|
|
614
|
-
return ret;
|
|
1108
|
+
const ret = wasm.hamming(ptr0, len0, ptr1, len1);
|
|
1109
|
+
return ret === 0x100000001 ? undefined : ret;
|
|
615
1110
|
}
|
|
616
1111
|
|
|
1112
|
+
/**
|
|
1113
|
+
* Dimension handling mode for full vector encoding.
|
|
1114
|
+
*
|
|
1115
|
+
* Controls whether to preserve original dimensions, reduce them,
|
|
1116
|
+
* or project to a common space for cross-dimensional comparison.
|
|
1117
|
+
*
|
|
1118
|
+
* # JavaScript Example
|
|
1119
|
+
*
|
|
1120
|
+
* ```javascript
|
|
1121
|
+
* import { ElidDimensionMode, encodeElidFullVector } from 'elid';
|
|
1122
|
+
*
|
|
1123
|
+
* // Preserve all dimensions
|
|
1124
|
+
* // Reduce to fewer dimensions for smaller output
|
|
1125
|
+
* // Common space for comparing different-sized embeddings
|
|
1126
|
+
* ```
|
|
1127
|
+
* @enum {0 | 1 | 2}
|
|
1128
|
+
*/
|
|
1129
|
+
export const ElidDimensionMode = Object.freeze({
|
|
1130
|
+
/**
|
|
1131
|
+
* Preserve all original dimensions (no projection)
|
|
1132
|
+
*/
|
|
1133
|
+
Preserve: 0, "0": "Preserve",
|
|
1134
|
+
/**
|
|
1135
|
+
* Reduce dimensions using random projection
|
|
1136
|
+
*/
|
|
1137
|
+
Reduce: 1, "1": "Reduce",
|
|
1138
|
+
/**
|
|
1139
|
+
* Project to common space for cross-dimensional comparison
|
|
1140
|
+
*/
|
|
1141
|
+
Common: 2, "2": "Common",
|
|
1142
|
+
});
|
|
1143
|
+
/**
|
|
1144
|
+
* ELID encoding profile for vector embeddings.
|
|
1145
|
+
*
|
|
1146
|
+
* Choose a profile based on your use case:
|
|
1147
|
+
* - `Mini128`: Fast 128-bit SimHash, good for similarity via Hamming distance
|
|
1148
|
+
* - `Morton10x10`: Z-order curve encoding, good for range queries
|
|
1149
|
+
* - `Hilbert10x10`: Hilbert curve encoding, best locality preservation
|
|
1150
|
+
*
|
|
1151
|
+
* # JavaScript Example
|
|
1152
|
+
*
|
|
1153
|
+
* ```javascript
|
|
1154
|
+
* import { ElidProfile, encodeElid } from 'elid';
|
|
1155
|
+
*
|
|
1156
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
1157
|
+
* const elid = encodeElid(embedding, ElidProfile.Mini128);
|
|
1158
|
+
* ```
|
|
1159
|
+
* @enum {0 | 1 | 2}
|
|
1160
|
+
*/
|
|
1161
|
+
export const ElidProfile = Object.freeze({
|
|
1162
|
+
/**
|
|
1163
|
+
* 128-bit SimHash (cosine similarity via Hamming distance)
|
|
1164
|
+
*/
|
|
1165
|
+
Mini128: 0, "0": "Mini128",
|
|
1166
|
+
/**
|
|
1167
|
+
* Morton/Z-order curve encoding (10 dims, 10 bits each)
|
|
1168
|
+
*/
|
|
1169
|
+
Morton10x10: 1, "1": "Morton10x10",
|
|
1170
|
+
/**
|
|
1171
|
+
* Hilbert curve encoding (10 dims, 10 bits each)
|
|
1172
|
+
*/
|
|
1173
|
+
Hilbert10x10: 2, "2": "Hilbert10x10",
|
|
1174
|
+
});
|
|
1175
|
+
/**
|
|
1176
|
+
* Precision options for full vector encoding.
|
|
1177
|
+
*
|
|
1178
|
+
* Controls how many bits are used to represent each dimension value.
|
|
1179
|
+
* Higher precision means more accurate reconstruction but larger output.
|
|
1180
|
+
*
|
|
1181
|
+
* # JavaScript Example
|
|
1182
|
+
*
|
|
1183
|
+
* ```javascript
|
|
1184
|
+
* import { ElidVectorPrecision, encodeElidWithPrecision } from 'elid';
|
|
1185
|
+
*
|
|
1186
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
1187
|
+
* // Full32 = lossless, Half16 = smaller with minimal error
|
|
1188
|
+
* ```
|
|
1189
|
+
* @enum {0 | 1 | 2}
|
|
1190
|
+
*/
|
|
1191
|
+
export const ElidVectorPrecision = Object.freeze({
|
|
1192
|
+
/**
|
|
1193
|
+
* Full 32-bit float (lossless, 4 bytes per dimension)
|
|
1194
|
+
*/
|
|
1195
|
+
Full32: 0, "0": "Full32",
|
|
1196
|
+
/**
|
|
1197
|
+
* 16-bit half-precision float (2 bytes per dimension)
|
|
1198
|
+
*/
|
|
1199
|
+
Half16: 1, "1": "Half16",
|
|
1200
|
+
/**
|
|
1201
|
+
* 8-bit quantized (1 byte per dimension, ~1% error)
|
|
1202
|
+
*/
|
|
1203
|
+
Quant8: 2, "2": "Quant8",
|
|
1204
|
+
});
|
|
1205
|
+
|
|
617
1206
|
const SimilarityOptionsFinalization = (typeof FinalizationRegistry === 'undefined')
|
|
618
1207
|
? { register: () => {}, unregister: () => {} }
|
|
619
1208
|
: new FinalizationRegistry(ptr => wasm.__wbg_similarityoptions_free(ptr >>> 0, 1));
|
|
@@ -757,6 +1346,11 @@ export function __wbg_new_e17d9f43105b08be() {
|
|
|
757
1346
|
return ret;
|
|
758
1347
|
};
|
|
759
1348
|
|
|
1349
|
+
export function __wbg_new_with_length_cd045ed0a87d4dd6(arg0) {
|
|
1350
|
+
const ret = new Float64Array(arg0 >>> 0);
|
|
1351
|
+
return ret;
|
|
1352
|
+
};
|
|
1353
|
+
|
|
760
1354
|
export function __wbg_set_3f1d0b984ed272ed(arg0, arg1, arg2) {
|
|
761
1355
|
arg0[arg1] = arg2;
|
|
762
1356
|
};
|
|
@@ -775,6 +1369,10 @@ export function __wbg_set_c2abbebe8b9ebee1() { return handleError(function (arg0
|
|
|
775
1369
|
return ret;
|
|
776
1370
|
}, arguments) };
|
|
777
1371
|
|
|
1372
|
+
export function __wbg_set_index_a0c01b257dd824f8(arg0, arg1, arg2) {
|
|
1373
|
+
arg0[arg1 >>> 0] = arg2;
|
|
1374
|
+
};
|
|
1375
|
+
|
|
778
1376
|
export function __wbindgen_cast_2241b6af4c4b2941(arg0, arg1) {
|
|
779
1377
|
// Cast intrinsic for `Ref(String) -> Externref`.
|
|
780
1378
|
const ret = getStringFromWasm0(arg0, arg1);
|