elid 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/elid_bg.js CHANGED
@@ -180,92 +180,6 @@ function handleError(f, args) {
180
180
  wasm.__wbindgen_exn_store(idx);
181
181
  }
182
182
  }
183
- /**
184
- * Compute the normalized SimHash similarity between two strings.
185
- *
186
- * Returns a value between 0.0 (completely different) and 1.0 (identical).
187
- *
188
- * # JavaScript Example
189
- *
190
- * ```javascript
191
- * import { simhashSimilarity } from 'elid';
192
- *
193
- * const similarity = simhashSimilarity("iPhone 14", "iPhone 15");
194
- * console.log(similarity); // ~0.9 (very similar)
195
- *
196
- * const similarity2 = simhashSimilarity("iPhone", "Galaxy");
197
- * console.log(similarity2); // ~0.4 (different)
198
- * ```
199
- * @param {string} a
200
- * @param {string} b
201
- * @returns {number}
202
- */
203
- export function simhashSimilarity(a, b) {
204
- const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
205
- const len0 = WASM_VECTOR_LEN;
206
- const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
207
- const len1 = WASM_VECTOR_LEN;
208
- const ret = wasm.simhashSimilarity(ptr0, len0, ptr1, len1);
209
- return ret;
210
- }
211
-
212
- /**
213
- * Compute the Jaro-Winkler similarity between two strings.
214
- *
215
- * Returns a value between 0.0 (completely different) and 1.0 (identical).
216
- * Gives more favorable ratings to strings with common prefixes.
217
- *
218
- * # JavaScript Example
219
- *
220
- * ```javascript
221
- * import { jaroWinkler } from 'elid';
222
- *
223
- * const similarity = jaroWinkler("martha", "marhta");
224
- * console.log(similarity); // ~0.961
225
- * ```
226
- * @param {string} a
227
- * @param {string} b
228
- * @returns {number}
229
- */
230
- export function jaroWinkler(a, b) {
231
- const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
232
- const len0 = WASM_VECTOR_LEN;
233
- const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
234
- const len1 = WASM_VECTOR_LEN;
235
- const ret = wasm.jaroWinkler(ptr0, len0, ptr1, len1);
236
- return ret;
237
- }
238
-
239
- /**
240
- * Compute the Hamming distance between two strings.
241
- *
242
- * Returns the number of positions at which the characters differ.
243
- * Returns null if strings have different lengths.
244
- *
245
- * # JavaScript Example
246
- *
247
- * ```javascript
248
- * import { hamming } from 'elid';
249
- *
250
- * const distance = hamming("karolin", "kathrin");
251
- * console.log(distance); // 3
252
- *
253
- * const invalid = hamming("hello", "world!");
254
- * console.log(invalid); // null
255
- * ```
256
- * @param {string} a
257
- * @param {string} b
258
- * @returns {number | undefined}
259
- */
260
- export function hamming(a, b) {
261
- const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
262
- const len0 = WASM_VECTOR_LEN;
263
- const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
264
- const len1 = WASM_VECTOR_LEN;
265
- const ret = wasm.hamming(ptr0, len0, ptr1, len1);
266
- return ret === 0x100000001 ? undefined : ret;
267
- }
268
-
269
183
  /**
270
184
  * Compute the SimHash fingerprint of a string.
271
185
  *
@@ -297,30 +211,41 @@ export function simhash(text) {
297
211
  return ret;
298
212
  }
299
213
 
214
+ function passArrayJsValueToWasm0(array, malloc) {
215
+ const ptr = malloc(array.length * 4, 4) >>> 0;
216
+ for (let i = 0; i < array.length; i++) {
217
+ const add = addToExternrefTable0(array[i]);
218
+ getDataViewMemory0().setUint32(ptr + 4 * i, add, true);
219
+ }
220
+ WASM_VECTOR_LEN = array.length;
221
+ return ptr;
222
+ }
300
223
  /**
301
- * Compute the Levenshtein distance between two strings.
224
+ * Find all matches above a threshold score.
302
225
  *
303
- * Returns the minimum number of single-character edits needed to transform one string into another.
226
+ * Returns an array of objects with index and score for all candidates above the threshold.
304
227
  *
305
228
  * # JavaScript Example
306
229
  *
307
230
  * ```javascript
308
- * import { levenshtein } from 'elid';
231
+ * import { findMatchesAboveThreshold } from 'elid';
309
232
  *
310
- * const distance = levenshtein("kitten", "sitting");
311
- * console.log(distance); // 3
233
+ * const candidates = ["apple", "application", "apply", "banana"];
234
+ * const matches = findMatchesAboveThreshold("app", candidates, 0.5);
235
+ * console.log(matches); // [{ index: 0, score: 0.907 }, { index: 1, score: 0.830 }, ...]
312
236
  * ```
313
- * @param {string} a
314
- * @param {string} b
315
- * @returns {number}
237
+ * @param {string} query
238
+ * @param {string[]} candidates
239
+ * @param {number} threshold
240
+ * @returns {any}
316
241
  */
317
- export function levenshtein(a, b) {
318
- const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
242
+ export function findMatchesAboveThreshold(query, candidates, threshold) {
243
+ const ptr0 = passStringToWasm0(query, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
319
244
  const len0 = WASM_VECTOR_LEN;
320
- const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
245
+ const ptr1 = passArrayJsValueToWasm0(candidates, wasm.__wbindgen_malloc);
321
246
  const len1 = WASM_VECTOR_LEN;
322
- const ret = wasm.levenshtein(ptr0, len0, ptr1, len1);
323
- return ret >>> 0;
247
+ const ret = wasm.findMatchesAboveThreshold(ptr0, len0, ptr1, len1, threshold);
248
+ return ret;
324
249
  }
325
250
 
326
251
  let cachedFloat64ArrayMemory0 = null;
@@ -339,6 +264,69 @@ function passArrayF64ToWasm0(arg, malloc) {
339
264
  return ptr;
340
265
  }
341
266
 
267
+ function takeFromExternrefTable0(idx) {
268
+ const value = wasm.__wbindgen_externrefs.get(idx);
269
+ wasm.__externref_table_dealloc(idx);
270
+ return value;
271
+ }
272
+ /**
273
+ * Encode an embedding for cross-dimensional comparison.
274
+ *
275
+ * Projects the embedding to a common dimension space, allowing comparison
276
+ * between embeddings of different original dimensions (e.g., 256d vs 768d).
277
+ *
278
+ * # Parameters
279
+ *
280
+ * - `embedding`: Float64 array of embedding values (64-2048 dimensions)
281
+ * - `common_dims`: Target dimension space (all vectors projected here)
282
+ *
283
+ * # Returns
284
+ *
285
+ * A base32hex-encoded ELID string.
286
+ *
287
+ * # JavaScript Example
288
+ *
289
+ * ```javascript
290
+ * import { encodeElidCrossDimensional, decodeElidToEmbedding } from 'elid';
291
+ *
292
+ * // Different sized embeddings from different models
293
+ * const embedding256 = new Float64Array(256).fill(0.1);
294
+ * const embedding768 = new Float64Array(768).fill(0.1);
295
+ *
296
+ * // Project both to 128-dim common space
297
+ * const elid1 = encodeElidCrossDimensional(embedding256, 128);
298
+ * const elid2 = encodeElidCrossDimensional(embedding768, 128);
299
+ *
300
+ * // Now they can be compared directly (both decode to 128 dims)
301
+ * const dec1 = decodeElidToEmbedding(elid1);
302
+ * const dec2 = decodeElidToEmbedding(elid2);
303
+ * // Both have length 128
304
+ * ```
305
+ * @param {Float64Array} embedding
306
+ * @param {number} common_dims
307
+ * @returns {string}
308
+ */
309
+ export function encodeElidCrossDimensional(embedding, common_dims) {
310
+ let deferred3_0;
311
+ let deferred3_1;
312
+ try {
313
+ const ptr0 = passArrayF64ToWasm0(embedding, wasm.__wbindgen_malloc);
314
+ const len0 = WASM_VECTOR_LEN;
315
+ const ret = wasm.encodeElidCrossDimensional(ptr0, len0, common_dims);
316
+ var ptr2 = ret[0];
317
+ var len2 = ret[1];
318
+ if (ret[3]) {
319
+ ptr2 = 0; len2 = 0;
320
+ throw takeFromExternrefTable0(ret[2]);
321
+ }
322
+ deferred3_0 = ptr2;
323
+ deferred3_1 = len2;
324
+ return getStringFromWasm0(ptr2, len2);
325
+ } finally {
326
+ wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
327
+ }
328
+ }
329
+
342
330
  let cachedUint32ArrayMemory0 = null;
343
331
 
344
332
  function getUint32ArrayMemory0() {
@@ -384,93 +372,409 @@ export function findSimilarHashes(query_hash, candidate_hashes, max_distance) {
384
372
  return v2;
385
373
  }
386
374
 
387
- function passArrayJsValueToWasm0(array, malloc) {
388
- const ptr = malloc(array.length * 4, 4) >>> 0;
389
- for (let i = 0; i < array.length; i++) {
390
- const add = addToExternrefTable0(array[i]);
391
- getDataViewMemory0().setUint32(ptr + 4 * i, add, true);
375
+ /**
376
+ * Compute the Jaro similarity between two strings.
377
+ *
378
+ * Returns a value between 0.0 (completely different) and 1.0 (identical).
379
+ * Particularly effective for short strings like names.
380
+ *
381
+ * # JavaScript Example
382
+ *
383
+ * ```javascript
384
+ * import { jaro } from 'elid';
385
+ *
386
+ * const similarity = jaro("martha", "marhta");
387
+ * console.log(similarity); // ~0.944
388
+ * ```
389
+ * @param {string} a
390
+ * @param {string} b
391
+ * @returns {number}
392
+ */
393
+ export function jaro(a, b) {
394
+ const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
395
+ const len0 = WASM_VECTOR_LEN;
396
+ const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
397
+ const len1 = WASM_VECTOR_LEN;
398
+ const ret = wasm.jaro(ptr0, len0, ptr1, len1);
399
+ return ret;
400
+ }
401
+
402
+ /**
403
+ * Compute the best matching similarity between two strings.
404
+ *
405
+ * Runs multiple algorithms and returns the highest score.
406
+ *
407
+ * # JavaScript Example
408
+ *
409
+ * ```javascript
410
+ * import { bestMatch } from 'elid';
411
+ *
412
+ * const score = bestMatch("hello", "hallo");
413
+ * console.log(score); // ~0.8
414
+ * ```
415
+ * @param {string} a
416
+ * @param {string} b
417
+ * @returns {number}
418
+ */
419
+ export function bestMatch(a, b) {
420
+ const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
421
+ const len0 = WASM_VECTOR_LEN;
422
+ const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
423
+ const len1 = WASM_VECTOR_LEN;
424
+ const ret = wasm.bestMatch(ptr0, len0, ptr1, len1);
425
+ return ret;
426
+ }
427
+
428
+ /**
429
+ * Encode an embedding vector to an ELID string.
430
+ *
431
+ * Converts a high-dimensional embedding (64-2048 dimensions) into a compact,
432
+ * sortable identifier. The ELID preserves locality properties for efficient
433
+ * similarity search.
434
+ *
435
+ * # Parameters
436
+ *
437
+ * - `embedding`: Float64 array of embedding values (64-2048 dimensions)
438
+ * - `profile`: Encoding profile (Mini128, Morton10x10, or Hilbert10x10)
439
+ *
440
+ * # Returns
441
+ *
442
+ * A base32hex-encoded ELID string suitable for storage and comparison.
443
+ *
444
+ * # JavaScript Example
445
+ *
446
+ * ```javascript
447
+ * import { encodeElid, ElidProfile } from 'elid';
448
+ *
449
+ * // OpenAI embeddings are 1536 dimensions
450
+ * const embedding = await getEmbedding("Hello world");
451
+ * const elid = encodeElid(embedding, ElidProfile.Mini128);
452
+ * console.log(elid); // "012345abcdef..."
453
+ * ```
454
+ * @param {Float64Array} embedding
455
+ * @param {ElidProfile} profile
456
+ * @returns {string}
457
+ */
458
+ export function encodeElid(embedding, profile) {
459
+ let deferred3_0;
460
+ let deferred3_1;
461
+ try {
462
+ const ptr0 = passArrayF64ToWasm0(embedding, wasm.__wbindgen_malloc);
463
+ const len0 = WASM_VECTOR_LEN;
464
+ const ret = wasm.encodeElid(ptr0, len0, profile);
465
+ var ptr2 = ret[0];
466
+ var len2 = ret[1];
467
+ if (ret[3]) {
468
+ ptr2 = 0; len2 = 0;
469
+ throw takeFromExternrefTable0(ret[2]);
470
+ }
471
+ deferred3_0 = ptr2;
472
+ deferred3_1 = len2;
473
+ return getStringFromWasm0(ptr2, len2);
474
+ } finally {
475
+ wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
392
476
  }
393
- WASM_VECTOR_LEN = array.length;
394
- return ptr;
395
477
  }
478
+
396
479
  /**
397
- * Find all matches above a threshold score.
480
+ * Compute the normalized SimHash similarity between two strings.
481
+ *
482
+ * Returns a value between 0.0 (completely different) and 1.0 (identical).
483
+ *
484
+ * # JavaScript Example
485
+ *
486
+ * ```javascript
487
+ * import { simhashSimilarity } from 'elid';
488
+ *
489
+ * const similarity = simhashSimilarity("iPhone 14", "iPhone 15");
490
+ * console.log(similarity); // ~0.9 (very similar)
491
+ *
492
+ * const similarity2 = simhashSimilarity("iPhone", "Galaxy");
493
+ * console.log(similarity2); // ~0.4 (different)
494
+ * ```
495
+ * @param {string} a
496
+ * @param {string} b
497
+ * @returns {number}
498
+ */
499
+ export function simhashSimilarity(a, b) {
500
+ const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
501
+ const len0 = WASM_VECTOR_LEN;
502
+ const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
503
+ const len1 = WASM_VECTOR_LEN;
504
+ const ret = wasm.simhashSimilarity(ptr0, len0, ptr1, len1);
505
+ return ret;
506
+ }
507
+
508
+ /**
509
+ * Encode an embedding using lossless full vector encoding.
510
+ *
511
+ * Preserves the exact embedding values (32-bit float precision) and all dimensions.
512
+ * This produces the largest output but allows exact reconstruction.
513
+ *
514
+ * # Parameters
515
+ *
516
+ * - `embedding`: Float64 array of embedding values (64-2048 dimensions)
517
+ *
518
+ * # Returns
519
+ *
520
+ * A base32hex-encoded ELID string that can be decoded back to the original embedding.
521
+ *
522
+ * # JavaScript Example
523
+ *
524
+ * ```javascript
525
+ * import { encodeElidLossless, decodeElidToEmbedding } from 'elid';
526
+ *
527
+ * const embedding = new Float64Array(768).fill(0.1);
528
+ * const elid = encodeElidLossless(embedding);
529
+ *
530
+ * // Later, recover the exact embedding
531
+ * const recovered = decodeElidToEmbedding(elid);
532
+ * // recovered is identical to embedding
533
+ * ```
534
+ * @param {Float64Array} embedding
535
+ * @returns {string}
536
+ */
537
+ export function encodeElidLossless(embedding) {
538
+ let deferred3_0;
539
+ let deferred3_1;
540
+ try {
541
+ const ptr0 = passArrayF64ToWasm0(embedding, wasm.__wbindgen_malloc);
542
+ const len0 = WASM_VECTOR_LEN;
543
+ const ret = wasm.encodeElidLossless(ptr0, len0);
544
+ var ptr2 = ret[0];
545
+ var len2 = ret[1];
546
+ if (ret[3]) {
547
+ ptr2 = 0; len2 = 0;
548
+ throw takeFromExternrefTable0(ret[2]);
549
+ }
550
+ deferred3_0 = ptr2;
551
+ deferred3_1 = len2;
552
+ return getStringFromWasm0(ptr2, len2);
553
+ } finally {
554
+ wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
555
+ }
556
+ }
557
+
558
+ /**
559
+ * Compute the normalized Levenshtein similarity between two strings.
560
+ *
561
+ * Returns a value between 0.0 (completely different) and 1.0 (identical).
562
+ *
563
+ * # JavaScript Example
564
+ *
565
+ * ```javascript
566
+ * import { normalizedLevenshtein } from 'elid';
567
+ *
568
+ * const similarity = normalizedLevenshtein("hello", "hallo");
569
+ * console.log(similarity); // ~0.8
570
+ * ```
571
+ * @param {string} a
572
+ * @param {string} b
573
+ * @returns {number}
574
+ */
575
+ export function normalizedLevenshtein(a, b) {
576
+ const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
577
+ const len0 = WASM_VECTOR_LEN;
578
+ const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
579
+ const len1 = WASM_VECTOR_LEN;
580
+ const ret = wasm.normalizedLevenshtein(ptr0, len0, ptr1, len1);
581
+ return ret;
582
+ }
583
+
584
+ /**
585
+ * Decode an ELID string back to an embedding vector.
586
+ *
587
+ * Only works for ELIDs encoded with a FullVector profile (lossless,
588
+ * compressed, or max_length). Returns null for non-reversible profiles
589
+ * like Mini128, Morton, or Hilbert.
590
+ *
591
+ * # Parameters
592
+ *
593
+ * - `elid_str`: A valid ELID string (base32hex encoded)
594
+ *
595
+ * # Returns
596
+ *
597
+ * A Float64Array containing the decoded embedding, or null if the ELID
598
+ * is not reversible.
599
+ *
600
+ * Note: If dimension reduction was used during encoding, the decoded
601
+ * embedding will be in the reduced dimension space, not the original.
602
+ *
603
+ * # JavaScript Example
604
+ *
605
+ * ```javascript
606
+ * import { encodeElidLossless, decodeElidToEmbedding, isElidReversible } from 'elid';
607
+ *
608
+ * const embedding = new Float64Array(768).fill(0.1);
609
+ * const elid = encodeElidLossless(embedding);
610
+ *
611
+ * if (isElidReversible(elid)) {
612
+ * const recovered = decodeElidToEmbedding(elid);
613
+ * console.log(recovered.length); // 768
614
+ * }
615
+ * ```
616
+ * @param {string} elid_str
617
+ * @returns {any}
618
+ */
619
+ export function decodeElidToEmbedding(elid_str) {
620
+ const ptr0 = passStringToWasm0(elid_str, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
621
+ const len0 = WASM_VECTOR_LEN;
622
+ const ret = wasm.decodeElidToEmbedding(ptr0, len0);
623
+ if (ret[2]) {
624
+ throw takeFromExternrefTable0(ret[1]);
625
+ }
626
+ return takeFromExternrefTable0(ret[0]);
627
+ }
628
+
629
+ /**
630
+ * Get metadata about a FullVector ELID.
631
+ *
632
+ * Returns an object containing information about how the ELID was encoded,
633
+ * including original dimensions, precision, and dimension mode.
634
+ *
635
+ * # Parameters
636
+ *
637
+ * - `elid_str`: A valid ELID string (base32hex encoded)
638
+ *
639
+ * # Returns
640
+ *
641
+ * An object with metadata fields, or null if not a FullVector ELID.
642
+ *
643
+ * # JavaScript Example
644
+ *
645
+ * ```javascript
646
+ * import { encodeElidCompressed, getElidMetadata } from 'elid';
647
+ *
648
+ * const embedding = new Float64Array(768).fill(0.1);
649
+ * const elid = encodeElidCompressed(embedding, 0.5);
650
+ *
651
+ * const meta = getElidMetadata(elid);
652
+ * if (meta) {
653
+ * console.log(meta.originalDims); // 768
654
+ * console.log(meta.encodedDims); // depends on compression
655
+ * console.log(meta.isLossless); // false
656
+ * }
657
+ * ```
658
+ * @param {string} elid_str
659
+ * @returns {any}
660
+ */
661
+ export function getElidMetadata(elid_str) {
662
+ const ptr0 = passStringToWasm0(elid_str, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
663
+ const len0 = WASM_VECTOR_LEN;
664
+ const ret = wasm.getElidMetadata(ptr0, len0);
665
+ if (ret[2]) {
666
+ throw takeFromExternrefTable0(ret[1]);
667
+ }
668
+ return takeFromExternrefTable0(ret[0]);
669
+ }
670
+
671
+ function getArrayU8FromWasm0(ptr, len) {
672
+ ptr = ptr >>> 0;
673
+ return getUint8ArrayMemory0().subarray(ptr / 1, ptr / 1 + len);
674
+ }
675
+ /**
676
+ * Decode an ELID string to raw bytes.
677
+ *
678
+ * Returns the raw byte representation of an ELID, including the header
679
+ * and payload bytes. Useful for custom processing or debugging.
680
+ *
681
+ * # Parameters
682
+ *
683
+ * - `elid_str`: A valid ELID string (base32hex encoded)
398
684
  *
399
- * Returns an array of objects with index and score for all candidates above the threshold.
685
+ * # Returns
686
+ *
687
+ * A Uint8Array containing the raw bytes (header + payload).
400
688
  *
401
689
  * # JavaScript Example
402
690
  *
403
691
  * ```javascript
404
- * import { findMatchesAboveThreshold } from 'elid';
692
+ * import { decodeElid } from 'elid';
405
693
  *
406
- * const candidates = ["apple", "application", "apply", "banana"];
407
- * const matches = findMatchesAboveThreshold("app", candidates, 0.5);
408
- * console.log(matches); // [{ index: 0, score: 0.907 }, { index: 1, score: 0.830 }, ...]
694
+ * const bytes = decodeElid("012345abcdef...");
695
+ * console.log(bytes); // Uint8Array [...]
409
696
  * ```
410
- * @param {string} query
411
- * @param {string[]} candidates
412
- * @param {number} threshold
413
- * @returns {any}
697
+ * @param {string} elid_str
698
+ * @returns {Uint8Array}
414
699
  */
415
- export function findMatchesAboveThreshold(query, candidates, threshold) {
416
- const ptr0 = passStringToWasm0(query, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
700
+ export function decodeElid(elid_str) {
701
+ const ptr0 = passStringToWasm0(elid_str, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
417
702
  const len0 = WASM_VECTOR_LEN;
418
- const ptr1 = passArrayJsValueToWasm0(candidates, wasm.__wbindgen_malloc);
419
- const len1 = WASM_VECTOR_LEN;
420
- const ret = wasm.findMatchesAboveThreshold(ptr0, len0, ptr1, len1, threshold);
421
- return ret;
703
+ const ret = wasm.decodeElid(ptr0, len0);
704
+ if (ret[3]) {
705
+ throw takeFromExternrefTable0(ret[2]);
706
+ }
707
+ var v2 = getArrayU8FromWasm0(ret[0], ret[1]).slice();
708
+ wasm.__wbindgen_free(ret[0], ret[1] * 1, 1);
709
+ return v2;
422
710
  }
423
711
 
424
712
  /**
425
- * Compute the OSA (Optimal String Alignment) distance between two strings.
713
+ * Compute the Levenshtein distance between two strings.
426
714
  *
427
- * Similar to Levenshtein but also considers transpositions as a single operation.
715
+ * Returns the minimum number of single-character edits needed to transform one string into another.
428
716
  *
429
717
  * # JavaScript Example
430
718
  *
431
719
  * ```javascript
432
- * import { osaDistance } from 'elid';
720
+ * import { levenshtein } from 'elid';
433
721
  *
434
- * const distance = osaDistance("ca", "ac");
435
- * console.log(distance); // 1 (transposition)
722
+ * const distance = levenshtein("kitten", "sitting");
723
+ * console.log(distance); // 3
436
724
  * ```
437
725
  * @param {string} a
438
726
  * @param {string} b
439
727
  * @returns {number}
440
728
  */
441
- export function osaDistance(a, b) {
729
+ export function levenshtein(a, b) {
442
730
  const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
443
731
  const len0 = WASM_VECTOR_LEN;
444
732
  const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
445
733
  const len1 = WASM_VECTOR_LEN;
446
- const ret = wasm.osaDistance(ptr0, len0, ptr1, len1);
734
+ const ret = wasm.levenshtein(ptr0, len0, ptr1, len1);
447
735
  return ret >>> 0;
448
736
  }
449
737
 
450
738
  /**
451
- * Compute the normalized Levenshtein similarity between two strings.
739
+ * Check if an ELID can be decoded back to an embedding.
452
740
  *
453
- * Returns a value between 0.0 (completely different) and 1.0 (identical).
741
+ * Returns true if the ELID was encoded with a FullVector profile
742
+ * (lossless, compressed, or max_length), false otherwise.
743
+ *
744
+ * # Parameters
745
+ *
746
+ * - `elid_str`: A valid ELID string (base32hex encoded)
747
+ *
748
+ * # Returns
749
+ *
750
+ * `true` if decodeElidToEmbedding will return an embedding, `false` otherwise.
454
751
  *
455
752
  * # JavaScript Example
456
753
  *
457
754
  * ```javascript
458
- * import { normalizedLevenshtein } from 'elid';
755
+ * import { encodeElid, encodeElidLossless, isElidReversible, ElidProfile } from 'elid';
459
756
  *
460
- * const similarity = normalizedLevenshtein("hello", "hallo");
461
- * console.log(similarity); // ~0.8
757
+ * const embedding = new Float64Array(768).fill(0.1);
758
+ *
759
+ * // Mini128 is NOT reversible
760
+ * const mini128Elid = encodeElid(embedding, ElidProfile.Mini128);
761
+ * console.log(isElidReversible(mini128Elid)); // false
762
+ *
763
+ * // Lossless IS reversible
764
+ * const losslessElid = encodeElidLossless(embedding);
765
+ * console.log(isElidReversible(losslessElid)); // true
462
766
  * ```
463
- * @param {string} a
464
- * @param {string} b
465
- * @returns {number}
767
+ * @param {string} elid_str
768
+ * @returns {boolean}
466
769
  */
467
- export function normalizedLevenshtein(a, b) {
468
- const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
770
+ export function isElidReversible(elid_str) {
771
+ const ptr0 = passStringToWasm0(elid_str, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
469
772
  const len0 = WASM_VECTOR_LEN;
470
- const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
471
- const len1 = WASM_VECTOR_LEN;
472
- const ret = wasm.normalizedLevenshtein(ptr0, len0, ptr1, len1);
473
- return ret;
773
+ const ret = wasm.isElidReversible(ptr0, len0);
774
+ if (ret[2]) {
775
+ throw takeFromExternrefTable0(ret[1]);
776
+ }
777
+ return ret[0] !== 0;
474
778
  }
475
779
 
476
780
  function _assertClass(instance, klass) {
@@ -510,29 +814,105 @@ export function levenshteinWithOpts(a, b, opts) {
510
814
  }
511
815
 
512
816
  /**
513
- * Compute the best matching similarity between two strings.
817
+ * Compute the Hamming distance between two ELID strings.
514
818
  *
515
- * Runs multiple algorithms and returns the highest score.
819
+ * Returns the number of differing bits between two Mini128 ELIDs.
820
+ * This distance is proportional to the angular distance between the
821
+ * original embeddings (lower = more similar).
822
+ *
823
+ * # Requirements
824
+ *
825
+ * Both ELIDs must use the Mini128 profile.
826
+ *
827
+ * # Parameters
828
+ *
829
+ * - `elid1`: First ELID string
830
+ * - `elid2`: Second ELID string
831
+ *
832
+ * # Returns
833
+ *
834
+ * Hamming distance (0-128). 0 means identical, 128 means completely different.
516
835
  *
517
836
  * # JavaScript Example
518
837
  *
519
838
  * ```javascript
520
- * import { bestMatch } from 'elid';
839
+ * import { encodeElid, elidHammingDistance, ElidProfile } from 'elid';
521
840
  *
522
- * const score = bestMatch("hello", "hallo");
523
- * console.log(score); // ~0.8
841
+ * const elid1 = encodeElid(embedding1, ElidProfile.Mini128);
842
+ * const elid2 = encodeElid(embedding2, ElidProfile.Mini128);
843
+ *
844
+ * const distance = elidHammingDistance(elid1, elid2);
845
+ * if (distance < 20) {
846
+ * console.log("Very similar embeddings!");
847
+ * }
848
+ * ```
849
+ * @param {string} elid1
850
+ * @param {string} elid2
851
+ * @returns {number}
852
+ */
853
+ export function elidHammingDistance(elid1, elid2) {
854
+ const ptr0 = passStringToWasm0(elid1, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
855
+ const len0 = WASM_VECTOR_LEN;
856
+ const ptr1 = passStringToWasm0(elid2, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
857
+ const len1 = WASM_VECTOR_LEN;
858
+ const ret = wasm.elidHammingDistance(ptr0, len0, ptr1, len1);
859
+ if (ret[2]) {
860
+ throw takeFromExternrefTable0(ret[1]);
861
+ }
862
+ return ret[0] >>> 0;
863
+ }
864
+
865
+ /**
866
+ * Find the best match for a query string in an array of candidates.
867
+ *
868
+ * Returns an object with the index and similarity score of the best match.
869
+ *
870
+ * # JavaScript Example
871
+ *
872
+ * ```javascript
873
+ * import { findBestMatch } from 'elid';
874
+ *
875
+ * const candidates = ["apple", "application", "apply"];
876
+ * const result = findBestMatch("app", candidates);
877
+ * console.log(result); // { index: 0, score: 0.907 }
878
+ * ```
879
+ * @param {string} query
880
+ * @param {string[]} candidates
881
+ * @returns {object}
882
+ */
883
+ export function findBestMatch(query, candidates) {
884
+ const ptr0 = passStringToWasm0(query, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
885
+ const len0 = WASM_VECTOR_LEN;
886
+ const ptr1 = passArrayJsValueToWasm0(candidates, wasm.__wbindgen_malloc);
887
+ const len1 = WASM_VECTOR_LEN;
888
+ const ret = wasm.findBestMatch(ptr0, len0, ptr1, len1);
889
+ return ret;
890
+ }
891
+
892
+ /**
893
+ * Compute the OSA (Optimal String Alignment) distance between two strings.
894
+ *
895
+ * Similar to Levenshtein but also considers transpositions as a single operation.
896
+ *
897
+ * # JavaScript Example
898
+ *
899
+ * ```javascript
900
+ * import { osaDistance } from 'elid';
901
+ *
902
+ * const distance = osaDistance("ca", "ac");
903
+ * console.log(distance); // 1 (transposition)
524
904
  * ```
525
905
  * @param {string} a
526
906
  * @param {string} b
527
907
  * @returns {number}
528
908
  */
529
- export function bestMatch(a, b) {
909
+ export function osaDistance(a, b) {
530
910
  const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
531
911
  const len0 = WASM_VECTOR_LEN;
532
912
  const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
533
913
  const len1 = WASM_VECTOR_LEN;
534
- const ret = wasm.bestMatch(ptr0, len0, ptr1, len1);
535
- return ret;
914
+ const ret = wasm.osaDistance(ptr0, len0, ptr1, len1);
915
+ return ret >>> 0;
536
916
  }
537
917
 
538
918
  /**
@@ -561,59 +941,268 @@ export function simhashDistance(hash1, hash2) {
561
941
  }
562
942
 
563
943
  /**
564
- * Compute the Jaro similarity between two strings.
944
+ * Encode an embedding with percentage-based compression.
945
+ *
946
+ * The retention percentage (0.0-1.0) controls how much information is preserved:
947
+ * - 1.0 = lossless (Full32 precision, all dimensions)
948
+ * - 0.5 = half precision and/or half dimensions
949
+ * - 0.25 = quarter precision and/or quarter dimensions
950
+ *
951
+ * The algorithm optimizes for dimension reduction first (which preserves
952
+ * more geometric relationships) before reducing precision.
953
+ *
954
+ * # Parameters
955
+ *
956
+ * - `embedding`: Float64 array of embedding values (64-2048 dimensions)
957
+ * - `retention_pct`: Information retention percentage (0.0-1.0)
958
+ *
959
+ * # Returns
960
+ *
961
+ * A base32hex-encoded ELID string.
962
+ *
963
+ * # JavaScript Example
964
+ *
965
+ * ```javascript
966
+ * import { encodeElidCompressed } from 'elid';
967
+ *
968
+ * const embedding = new Float64Array(768).fill(0.1);
969
+ *
970
+ * // 50% retention - good balance of size and fidelity
971
+ * const elid = encodeElidCompressed(embedding, 0.5);
972
+ *
973
+ * // 25% retention - smaller but less accurate
974
+ * const smallElid = encodeElidCompressed(embedding, 0.25);
975
+ * ```
976
+ * @param {Float64Array} embedding
977
+ * @param {number} retention_pct
978
+ * @returns {string}
979
+ */
980
+ export function encodeElidCompressed(embedding, retention_pct) {
981
+ let deferred3_0;
982
+ let deferred3_1;
983
+ try {
984
+ const ptr0 = passArrayF64ToWasm0(embedding, wasm.__wbindgen_malloc);
985
+ const len0 = WASM_VECTOR_LEN;
986
+ const ret = wasm.encodeElidCompressed(ptr0, len0, retention_pct);
987
+ var ptr2 = ret[0];
988
+ var len2 = ret[1];
989
+ if (ret[3]) {
990
+ ptr2 = 0; len2 = 0;
991
+ throw takeFromExternrefTable0(ret[2]);
992
+ }
993
+ deferred3_0 = ptr2;
994
+ deferred3_1 = len2;
995
+ return getStringFromWasm0(ptr2, len2);
996
+ } finally {
997
+ wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
998
+ }
999
+ }
1000
+
1001
+ /**
1002
+ * Encode an embedding with a maximum output string length constraint.
1003
+ *
1004
+ * Calculates the optimal precision and dimension settings to fit within
1005
+ * the specified character limit while maximizing fidelity.
1006
+ *
1007
+ * # Parameters
1008
+ *
1009
+ * - `embedding`: Float64 array of embedding values (64-2048 dimensions)
1010
+ * - `max_chars`: Maximum output string length in characters
1011
+ *
1012
+ * # Returns
1013
+ *
1014
+ * A base32hex-encoded ELID string guaranteed to be <= max_chars in length.
1015
+ *
1016
+ * # JavaScript Example
1017
+ *
1018
+ * ```javascript
1019
+ * import { encodeElidMaxLength } from 'elid';
1020
+ *
1021
+ * const embedding = new Float64Array(768).fill(0.1);
1022
+ *
1023
+ * // Fit in 100 characters (e.g., for database column constraints)
1024
+ * const elid = encodeElidMaxLength(embedding, 100);
1025
+ * console.log(elid.length <= 100); // true
1026
+ *
1027
+ * // Fit in 50 characters (more compression)
1028
+ * const shortElid = encodeElidMaxLength(embedding, 50);
1029
+ * ```
1030
+ * @param {Float64Array} embedding
1031
+ * @param {number} max_chars
1032
+ * @returns {string}
1033
+ */
1034
+ export function encodeElidMaxLength(embedding, max_chars) {
1035
+ let deferred3_0;
1036
+ let deferred3_1;
1037
+ try {
1038
+ const ptr0 = passArrayF64ToWasm0(embedding, wasm.__wbindgen_malloc);
1039
+ const len0 = WASM_VECTOR_LEN;
1040
+ const ret = wasm.encodeElidMaxLength(ptr0, len0, max_chars);
1041
+ var ptr2 = ret[0];
1042
+ var len2 = ret[1];
1043
+ if (ret[3]) {
1044
+ ptr2 = 0; len2 = 0;
1045
+ throw takeFromExternrefTable0(ret[2]);
1046
+ }
1047
+ deferred3_0 = ptr2;
1048
+ deferred3_1 = len2;
1049
+ return getStringFromWasm0(ptr2, len2);
1050
+ } finally {
1051
+ wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
1052
+ }
1053
+ }
1054
+
1055
+ /**
1056
+ * Compute the Jaro-Winkler similarity between two strings.
565
1057
  *
566
1058
  * Returns a value between 0.0 (completely different) and 1.0 (identical).
567
- * Particularly effective for short strings like names.
1059
+ * Gives more favorable ratings to strings with common prefixes.
568
1060
  *
569
1061
  * # JavaScript Example
570
1062
  *
571
1063
  * ```javascript
572
- * import { jaro } from 'elid';
1064
+ * import { jaroWinkler } from 'elid';
573
1065
  *
574
- * const similarity = jaro("martha", "marhta");
575
- * console.log(similarity); // ~0.944
1066
+ * const similarity = jaroWinkler("martha", "marhta");
1067
+ * console.log(similarity); // ~0.961
576
1068
  * ```
577
1069
  * @param {string} a
578
1070
  * @param {string} b
579
1071
  * @returns {number}
580
1072
  */
581
- export function jaro(a, b) {
1073
+ export function jaroWinkler(a, b) {
582
1074
  const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
583
1075
  const len0 = WASM_VECTOR_LEN;
584
1076
  const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
585
1077
  const len1 = WASM_VECTOR_LEN;
586
- const ret = wasm.jaro(ptr0, len0, ptr1, len1);
1078
+ const ret = wasm.jaroWinkler(ptr0, len0, ptr1, len1);
587
1079
  return ret;
588
1080
  }
589
1081
 
590
1082
  /**
591
- * Find the best match for a query string in an array of candidates.
1083
+ * Compute the Hamming distance between two strings.
592
1084
  *
593
- * Returns an object with the index and similarity score of the best match.
1085
+ * Returns the number of positions at which the characters differ.
1086
+ * Returns null if strings have different lengths.
594
1087
  *
595
1088
  * # JavaScript Example
596
1089
  *
597
1090
  * ```javascript
598
- * import { findBestMatch } from 'elid';
1091
+ * import { hamming } from 'elid';
599
1092
  *
600
- * const candidates = ["apple", "application", "apply"];
601
- * const result = findBestMatch("app", candidates);
602
- * console.log(result); // { index: 0, score: 0.907 }
1093
+ * const distance = hamming("karolin", "kathrin");
1094
+ * console.log(distance); // 3
1095
+ *
1096
+ * const invalid = hamming("hello", "world!");
1097
+ * console.log(invalid); // null
603
1098
  * ```
604
- * @param {string} query
605
- * @param {string[]} candidates
606
- * @returns {object}
1099
+ * @param {string} a
1100
+ * @param {string} b
1101
+ * @returns {number | undefined}
607
1102
  */
608
- export function findBestMatch(query, candidates) {
609
- const ptr0 = passStringToWasm0(query, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
1103
+ export function hamming(a, b) {
1104
+ const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
610
1105
  const len0 = WASM_VECTOR_LEN;
611
- const ptr1 = passArrayJsValueToWasm0(candidates, wasm.__wbindgen_malloc);
1106
+ const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
612
1107
  const len1 = WASM_VECTOR_LEN;
613
- const ret = wasm.findBestMatch(ptr0, len0, ptr1, len1);
614
- return ret;
1108
+ const ret = wasm.hamming(ptr0, len0, ptr1, len1);
1109
+ return ret === 0x100000001 ? undefined : ret;
615
1110
  }
616
1111
 
1112
+ /**
1113
+ * Dimension handling mode for full vector encoding.
1114
+ *
1115
+ * Controls whether to preserve original dimensions, reduce them,
1116
+ * or project to a common space for cross-dimensional comparison.
1117
+ *
1118
+ * # JavaScript Example
1119
+ *
1120
+ * ```javascript
1121
+ * import { ElidDimensionMode, encodeElidFullVector } from 'elid';
1122
+ *
1123
+ * // Preserve all dimensions
1124
+ * // Reduce to fewer dimensions for smaller output
1125
+ * // Common space for comparing different-sized embeddings
1126
+ * ```
1127
+ * @enum {0 | 1 | 2}
1128
+ */
1129
+ export const ElidDimensionMode = Object.freeze({
1130
+ /**
1131
+ * Preserve all original dimensions (no projection)
1132
+ */
1133
+ Preserve: 0, "0": "Preserve",
1134
+ /**
1135
+ * Reduce dimensions using random projection
1136
+ */
1137
+ Reduce: 1, "1": "Reduce",
1138
+ /**
1139
+ * Project to common space for cross-dimensional comparison
1140
+ */
1141
+ Common: 2, "2": "Common",
1142
+ });
1143
+ /**
1144
+ * ELID encoding profile for vector embeddings.
1145
+ *
1146
+ * Choose a profile based on your use case:
1147
+ * - `Mini128`: Fast 128-bit SimHash, good for similarity via Hamming distance
1148
+ * - `Morton10x10`: Z-order curve encoding, good for range queries
1149
+ * - `Hilbert10x10`: Hilbert curve encoding, best locality preservation
1150
+ *
1151
+ * # JavaScript Example
1152
+ *
1153
+ * ```javascript
1154
+ * import { ElidProfile, encodeElid } from 'elid';
1155
+ *
1156
+ * const embedding = new Float64Array(768).fill(0.1);
1157
+ * const elid = encodeElid(embedding, ElidProfile.Mini128);
1158
+ * ```
1159
+ * @enum {0 | 1 | 2}
1160
+ */
1161
+ export const ElidProfile = Object.freeze({
1162
+ /**
1163
+ * 128-bit SimHash (cosine similarity via Hamming distance)
1164
+ */
1165
+ Mini128: 0, "0": "Mini128",
1166
+ /**
1167
+ * Morton/Z-order curve encoding (10 dims, 10 bits each)
1168
+ */
1169
+ Morton10x10: 1, "1": "Morton10x10",
1170
+ /**
1171
+ * Hilbert curve encoding (10 dims, 10 bits each)
1172
+ */
1173
+ Hilbert10x10: 2, "2": "Hilbert10x10",
1174
+ });
1175
+ /**
1176
+ * Precision options for full vector encoding.
1177
+ *
1178
+ * Controls how many bits are used to represent each dimension value.
1179
+ * Higher precision means more accurate reconstruction but larger output.
1180
+ *
1181
+ * # JavaScript Example
1182
+ *
1183
+ * ```javascript
1184
+ * import { ElidVectorPrecision, encodeElidWithPrecision } from 'elid';
1185
+ *
1186
+ * const embedding = new Float64Array(768).fill(0.1);
1187
+ * // Full32 = lossless, Half16 = smaller with minimal error
1188
+ * ```
1189
+ * @enum {0 | 1 | 2}
1190
+ */
1191
+ export const ElidVectorPrecision = Object.freeze({
1192
+ /**
1193
+ * Full 32-bit float (lossless, 4 bytes per dimension)
1194
+ */
1195
+ Full32: 0, "0": "Full32",
1196
+ /**
1197
+ * 16-bit half-precision float (2 bytes per dimension)
1198
+ */
1199
+ Half16: 1, "1": "Half16",
1200
+ /**
1201
+ * 8-bit quantized (1 byte per dimension, ~1% error)
1202
+ */
1203
+ Quant8: 2, "2": "Quant8",
1204
+ });
1205
+
617
1206
  const SimilarityOptionsFinalization = (typeof FinalizationRegistry === 'undefined')
618
1207
  ? { register: () => {}, unregister: () => {} }
619
1208
  : new FinalizationRegistry(ptr => wasm.__wbg_similarityoptions_free(ptr >>> 0, 1));
@@ -757,6 +1346,11 @@ export function __wbg_new_e17d9f43105b08be() {
757
1346
  return ret;
758
1347
  };
759
1348
 
1349
+ export function __wbg_new_with_length_cd045ed0a87d4dd6(arg0) {
1350
+ const ret = new Float64Array(arg0 >>> 0);
1351
+ return ret;
1352
+ };
1353
+
760
1354
  export function __wbg_set_3f1d0b984ed272ed(arg0, arg1, arg2) {
761
1355
  arg0[arg1] = arg2;
762
1356
  };
@@ -775,6 +1369,10 @@ export function __wbg_set_c2abbebe8b9ebee1() { return handleError(function (arg0
775
1369
  return ret;
776
1370
  }, arguments) };
777
1371
 
1372
+ export function __wbg_set_index_a0c01b257dd824f8(arg0, arg1, arg2) {
1373
+ arg0[arg1 >>> 0] = arg2;
1374
+ };
1375
+
778
1376
  export function __wbindgen_cast_2241b6af4c4b2941(arg0, arg1) {
779
1377
  // Cast intrinsic for `Ref(String) -> Externref`.
780
1378
  const ret = getStringFromWasm0(arg0, arg1);