elid 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/elid.d.ts +265 -265
  2. package/elid_bg.js +467 -467
  3. package/elid_bg.wasm +0 -0
  4. package/package.json +1 -1
package/elid_bg.js CHANGED
@@ -181,55 +181,34 @@ function handleError(f, args) {
181
181
  }
182
182
  }
183
183
  /**
184
- * Compute the OSA (Optimal String Alignment) distance between two strings.
184
+ * Compute the SimHash fingerprint of a string.
185
185
  *
186
- * Similar to Levenshtein but also considers transpositions as a single operation.
186
+ * Returns a 64-bit hash where similar strings produce similar numbers.
187
+ * Use this for database queries by storing the hash and querying by numeric range.
187
188
  *
188
189
  * # JavaScript Example
189
190
  *
190
191
  * ```javascript
191
- * import { osaDistance } from 'elid';
192
- *
193
- * const distance = osaDistance("ca", "ac");
194
- * console.log(distance); // 1 (transposition)
195
- * ```
196
- * @param {string} a
197
- * @param {string} b
198
- * @returns {number}
199
- */
200
- export function osaDistance(a, b) {
201
- const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
202
- const len0 = WASM_VECTOR_LEN;
203
- const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
204
- const len1 = WASM_VECTOR_LEN;
205
- const ret = wasm.osaDistance(ptr0, len0, ptr1, len1);
206
- return ret >>> 0;
207
- }
208
-
209
- /**
210
- * Compute the Levenshtein distance between two strings.
211
- *
212
- * Returns the minimum number of single-character edits needed to transform one string into another.
192
+ * import { simhash } from 'elid';
213
193
  *
214
- * # JavaScript Example
194
+ * const hash1 = simhash("iPhone 14");
195
+ * const hash2 = simhash("iPhone 15");
196
+ * const hash3 = simhash("Galaxy S23");
215
197
  *
216
- * ```javascript
217
- * import { levenshtein } from 'elid';
198
+ * // hash1 and hash2 will be numerically close
199
+ * // hash3 will be numerically distant
218
200
  *
219
- * const distance = levenshtein("kitten", "sitting");
220
- * console.log(distance); // 3
201
+ * // Store in database as bigint:
202
+ * // { name: "iPhone 14", simhash: hash1 }
221
203
  * ```
222
- * @param {string} a
223
- * @param {string} b
204
+ * @param {string} text
224
205
  * @returns {number}
225
206
  */
226
- export function levenshtein(a, b) {
227
- const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
207
+ export function simhash(text) {
208
+ const ptr0 = passStringToWasm0(text, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
228
209
  const len0 = WASM_VECTOR_LEN;
229
- const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
230
- const len1 = WASM_VECTOR_LEN;
231
- const ret = wasm.levenshtein(ptr0, len0, ptr1, len1);
232
- return ret >>> 0;
210
+ const ret = wasm.simhash(ptr0, len0);
211
+ return ret;
233
212
  }
234
213
 
235
214
  function passArrayJsValueToWasm0(array, malloc) {
@@ -269,31 +248,6 @@ export function findMatchesAboveThreshold(query, candidates, threshold) {
269
248
  return ret;
270
249
  }
271
250
 
272
- /**
273
- * Compute the Hamming distance between two SimHash values.
274
- *
275
- * Returns the number of differing bits. Lower values = higher similarity.
276
- *
277
- * # JavaScript Example
278
- *
279
- * ```javascript
280
- * import { simhash, simhashDistance } from 'elid';
281
- *
282
- * const hash1 = simhash("iPhone 14");
283
- * const hash2 = simhash("iPhone 15");
284
- * const distance = simhashDistance(hash1, hash2);
285
- *
286
- * console.log(distance); // Low number = similar
287
- * ```
288
- * @param {number} hash1
289
- * @param {number} hash2
290
- * @returns {number}
291
- */
292
- export function simhashDistance(hash1, hash2) {
293
- const ret = wasm.simhashDistance(hash1, hash2);
294
- return ret >>> 0;
295
- }
296
-
297
251
  let cachedFloat64ArrayMemory0 = null;
298
252
 
299
253
  function getFloat64ArrayMemory0() {
@@ -316,45 +270,49 @@ function takeFromExternrefTable0(idx) {
316
270
  return value;
317
271
  }
318
272
  /**
319
- * Encode an embedding with a maximum output string length constraint.
273
+ * Encode an embedding for cross-dimensional comparison.
320
274
  *
321
- * Calculates the optimal precision and dimension settings to fit within
322
- * the specified character limit while maximizing fidelity.
275
+ * Projects the embedding to a common dimension space, allowing comparison
276
+ * between embeddings of different original dimensions (e.g., 256d vs 768d).
323
277
  *
324
278
  * # Parameters
325
279
  *
326
280
  * - `embedding`: Float64 array of embedding values (64-2048 dimensions)
327
- * - `max_chars`: Maximum output string length in characters
281
+ * - `common_dims`: Target dimension space (all vectors projected here)
328
282
  *
329
283
  * # Returns
330
284
  *
331
- * A base32hex-encoded ELID string guaranteed to be <= max_chars in length.
285
+ * A base32hex-encoded ELID string.
332
286
  *
333
287
  * # JavaScript Example
334
288
  *
335
289
  * ```javascript
336
- * import { encodeElidMaxLength } from 'elid';
290
+ * import { encodeElidCrossDimensional, decodeElidToEmbedding } from 'elid';
337
291
  *
338
- * const embedding = new Float64Array(768).fill(0.1);
292
+ * // Different sized embeddings from different models
293
+ * const embedding256 = new Float64Array(256).fill(0.1);
294
+ * const embedding768 = new Float64Array(768).fill(0.1);
339
295
  *
340
- * // Fit in 100 characters (e.g., for database column constraints)
341
- * const elid = encodeElidMaxLength(embedding, 100);
342
- * console.log(elid.length <= 100); // true
296
+ * // Project both to 128-dim common space
297
+ * const elid1 = encodeElidCrossDimensional(embedding256, 128);
298
+ * const elid2 = encodeElidCrossDimensional(embedding768, 128);
343
299
  *
344
- * // Fit in 50 characters (more compression)
345
- * const shortElid = encodeElidMaxLength(embedding, 50);
300
+ * // Now they can be compared directly (both decode to 128 dims)
301
+ * const dec1 = decodeElidToEmbedding(elid1);
302
+ * const dec2 = decodeElidToEmbedding(elid2);
303
+ * // Both have length 128
346
304
  * ```
347
305
  * @param {Float64Array} embedding
348
- * @param {number} max_chars
306
+ * @param {number} common_dims
349
307
  * @returns {string}
350
308
  */
351
- export function encodeElidMaxLength(embedding, max_chars) {
309
+ export function encodeElidCrossDimensional(embedding, common_dims) {
352
310
  let deferred3_0;
353
311
  let deferred3_1;
354
312
  try {
355
313
  const ptr0 = passArrayF64ToWasm0(embedding, wasm.__wbindgen_malloc);
356
314
  const len0 = WASM_VECTOR_LEN;
357
- const ret = wasm.encodeElidMaxLength(ptr0, len0, max_chars);
315
+ const ret = wasm.encodeElidCrossDimensional(ptr0, len0, common_dims);
358
316
  var ptr2 = ret[0];
359
317
  var len2 = ret[1];
360
318
  if (ret[3]) {
@@ -369,154 +327,303 @@ export function encodeElidMaxLength(embedding, max_chars) {
369
327
  }
370
328
  }
371
329
 
372
- function _assertClass(instance, klass) {
373
- if (!(instance instanceof klass)) {
374
- throw new Error(`expected instance of ${klass.name}`);
330
+ let cachedUint32ArrayMemory0 = null;
331
+
332
+ function getUint32ArrayMemory0() {
333
+ if (cachedUint32ArrayMemory0 === null || cachedUint32ArrayMemory0.byteLength === 0) {
334
+ cachedUint32ArrayMemory0 = new Uint32Array(wasm.memory.buffer);
375
335
  }
336
+ return cachedUint32ArrayMemory0;
337
+ }
338
+
339
+ function getArrayU32FromWasm0(ptr, len) {
340
+ ptr = ptr >>> 0;
341
+ return getUint32ArrayMemory0().subarray(ptr / 4, ptr / 4 + len);
376
342
  }
377
343
  /**
378
- * Compute Levenshtein distance with custom options.
344
+ * Find all hashes within a given distance threshold.
345
+ *
346
+ * Useful for database queries - pre-compute hashes, then find similar ones.
379
347
  *
380
348
  * # JavaScript Example
381
349
  *
382
350
  * ```javascript
383
- * import { levenshteinWithOpts, SimilarityOptions } from 'elid';
351
+ * import { simhash, findSimilarHashes } from 'elid';
384
352
  *
385
- * const opts = new SimilarityOptions();
386
- * opts.setCaseSensitive(false);
387
- * opts.setTrimWhitespace(true);
353
+ * const candidates = ["iPhone 14 Pro", "iPhone 13", "Galaxy S23"];
354
+ * const hashes = candidates.map(s => simhash(s));
388
355
  *
389
- * const distance = levenshteinWithOpts(" HELLO ", "hello", opts);
390
- * console.log(distance); // 0
356
+ * const queryHash = simhash("iPhone 14");
357
+ * const matches = findSimilarHashes(queryHash, hashes, 10);
358
+ *
359
+ * console.log(matches); // [0, 1] - indices of similar items
391
360
  * ```
392
- * @param {string} a
393
- * @param {string} b
394
- * @param {SimilarityOptions} opts
395
- * @returns {number}
361
+ * @param {number} query_hash
362
+ * @param {Float64Array} candidate_hashes
363
+ * @param {number} max_distance
364
+ * @returns {Uint32Array}
396
365
  */
397
- export function levenshteinWithOpts(a, b, opts) {
398
- const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
366
+ export function findSimilarHashes(query_hash, candidate_hashes, max_distance) {
367
+ const ptr0 = passArrayF64ToWasm0(candidate_hashes, wasm.__wbindgen_malloc);
399
368
  const len0 = WASM_VECTOR_LEN;
400
- const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
401
- const len1 = WASM_VECTOR_LEN;
402
- _assertClass(opts, SimilarityOptions);
403
- var ptr2 = opts.__destroy_into_raw();
404
- const ret = wasm.levenshteinWithOpts(ptr0, len0, ptr1, len1, ptr2);
405
- return ret >>> 0;
369
+ const ret = wasm.findSimilarHashes(query_hash, ptr0, len0, max_distance);
370
+ var v2 = getArrayU32FromWasm0(ret[0], ret[1]).slice();
371
+ wasm.__wbindgen_free(ret[0], ret[1] * 4, 4);
372
+ return v2;
406
373
  }
407
374
 
408
375
  /**
409
- * Check if an ELID can be decoded back to an embedding.
410
- *
411
- * Returns true if the ELID was encoded with a FullVector profile
412
- * (lossless, compressed, or max_length), false otherwise.
413
- *
414
- * # Parameters
415
- *
416
- * - `elid_str`: A valid ELID string (base32hex encoded)
417
- *
418
- * # Returns
376
+ * Compute the Jaro similarity between two strings.
419
377
  *
420
- * `true` if decodeElidToEmbedding will return an embedding, `false` otherwise.
378
+ * Returns a value between 0.0 (completely different) and 1.0 (identical).
379
+ * Particularly effective for short strings like names.
421
380
  *
422
381
  * # JavaScript Example
423
382
  *
424
383
  * ```javascript
425
- * import { encodeElid, encodeElidLossless, isElidReversible, ElidProfile } from 'elid';
426
- *
427
- * const embedding = new Float64Array(768).fill(0.1);
428
- *
429
- * // Mini128 is NOT reversible
430
- * const mini128Elid = encodeElid(embedding, ElidProfile.Mini128);
431
- * console.log(isElidReversible(mini128Elid)); // false
384
+ * import { jaro } from 'elid';
432
385
  *
433
- * // Lossless IS reversible
434
- * const losslessElid = encodeElidLossless(embedding);
435
- * console.log(isElidReversible(losslessElid)); // true
386
+ * const similarity = jaro("martha", "marhta");
387
+ * console.log(similarity); // ~0.944
436
388
  * ```
437
- * @param {string} elid_str
438
- * @returns {boolean}
389
+ * @param {string} a
390
+ * @param {string} b
391
+ * @returns {number}
439
392
  */
440
- export function isElidReversible(elid_str) {
441
- const ptr0 = passStringToWasm0(elid_str, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
393
+ export function jaro(a, b) {
394
+ const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
442
395
  const len0 = WASM_VECTOR_LEN;
443
- const ret = wasm.isElidReversible(ptr0, len0);
444
- if (ret[2]) {
445
- throw takeFromExternrefTable0(ret[1]);
446
- }
447
- return ret[0] !== 0;
396
+ const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
397
+ const len1 = WASM_VECTOR_LEN;
398
+ const ret = wasm.jaro(ptr0, len0, ptr1, len1);
399
+ return ret;
448
400
  }
449
401
 
450
402
  /**
451
- * Compute the SimHash fingerprint of a string.
403
+ * Compute the best matching similarity between two strings.
452
404
  *
453
- * Returns a 64-bit hash where similar strings produce similar numbers.
454
- * Use this for database queries by storing the hash and querying by numeric range.
405
+ * Runs multiple algorithms and returns the highest score.
455
406
  *
456
407
  * # JavaScript Example
457
408
  *
458
409
  * ```javascript
459
- * import { simhash } from 'elid';
460
- *
461
- * const hash1 = simhash("iPhone 14");
462
- * const hash2 = simhash("iPhone 15");
463
- * const hash3 = simhash("Galaxy S23");
464
- *
465
- * // hash1 and hash2 will be numerically close
466
- * // hash3 will be numerically distant
410
+ * import { bestMatch } from 'elid';
467
411
  *
468
- * // Store in database as bigint:
469
- * // { name: "iPhone 14", simhash: hash1 }
412
+ * const score = bestMatch("hello", "hallo");
413
+ * console.log(score); // ~0.8
470
414
  * ```
471
- * @param {string} text
415
+ * @param {string} a
416
+ * @param {string} b
472
417
  * @returns {number}
473
418
  */
474
- export function simhash(text) {
475
- const ptr0 = passStringToWasm0(text, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
419
+ export function bestMatch(a, b) {
420
+ const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
476
421
  const len0 = WASM_VECTOR_LEN;
477
- const ret = wasm.simhash(ptr0, len0);
422
+ const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
423
+ const len1 = WASM_VECTOR_LEN;
424
+ const ret = wasm.bestMatch(ptr0, len0, ptr1, len1);
478
425
  return ret;
479
426
  }
480
427
 
481
- function getArrayU8FromWasm0(ptr, len) {
482
- ptr = ptr >>> 0;
483
- return getUint8ArrayMemory0().subarray(ptr / 1, ptr / 1 + len);
484
- }
485
428
  /**
486
- * Decode an ELID string to raw bytes.
429
+ * Encode an embedding vector to an ELID string.
487
430
  *
488
- * Returns the raw byte representation of an ELID, including the header
489
- * and payload bytes. Useful for custom processing or debugging.
431
+ * Converts a high-dimensional embedding (64-2048 dimensions) into a compact,
432
+ * sortable identifier. The ELID preserves locality properties for efficient
433
+ * similarity search.
490
434
  *
491
435
  * # Parameters
492
436
  *
493
- * - `elid_str`: A valid ELID string (base32hex encoded)
437
+ * - `embedding`: Float64 array of embedding values (64-2048 dimensions)
438
+ * - `profile`: Encoding profile (Mini128, Morton10x10, or Hilbert10x10)
494
439
  *
495
440
  * # Returns
496
441
  *
497
- * A Uint8Array containing the raw bytes (header + payload).
442
+ * A base32hex-encoded ELID string suitable for storage and comparison.
498
443
  *
499
444
  * # JavaScript Example
500
445
  *
501
446
  * ```javascript
502
- * import { decodeElid } from 'elid';
447
+ * import { encodeElid, ElidProfile } from 'elid';
503
448
  *
504
- * const bytes = decodeElid("012345abcdef...");
505
- * console.log(bytes); // Uint8Array [...]
449
+ * // OpenAI embeddings are 1536 dimensions
450
+ * const embedding = await getEmbedding("Hello world");
451
+ * const elid = encodeElid(embedding, ElidProfile.Mini128);
452
+ * console.log(elid); // "012345abcdef..."
453
+ * ```
454
+ * @param {Float64Array} embedding
455
+ * @param {ElidProfile} profile
456
+ * @returns {string}
457
+ */
458
+ export function encodeElid(embedding, profile) {
459
+ let deferred3_0;
460
+ let deferred3_1;
461
+ try {
462
+ const ptr0 = passArrayF64ToWasm0(embedding, wasm.__wbindgen_malloc);
463
+ const len0 = WASM_VECTOR_LEN;
464
+ const ret = wasm.encodeElid(ptr0, len0, profile);
465
+ var ptr2 = ret[0];
466
+ var len2 = ret[1];
467
+ if (ret[3]) {
468
+ ptr2 = 0; len2 = 0;
469
+ throw takeFromExternrefTable0(ret[2]);
470
+ }
471
+ deferred3_0 = ptr2;
472
+ deferred3_1 = len2;
473
+ return getStringFromWasm0(ptr2, len2);
474
+ } finally {
475
+ wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
476
+ }
477
+ }
478
+
479
+ /**
480
+ * Compute the normalized SimHash similarity between two strings.
481
+ *
482
+ * Returns a value between 0.0 (completely different) and 1.0 (identical).
483
+ *
484
+ * # JavaScript Example
485
+ *
486
+ * ```javascript
487
+ * import { simhashSimilarity } from 'elid';
488
+ *
489
+ * const similarity = simhashSimilarity("iPhone 14", "iPhone 15");
490
+ * console.log(similarity); // ~0.9 (very similar)
491
+ *
492
+ * const similarity2 = simhashSimilarity("iPhone", "Galaxy");
493
+ * console.log(similarity2); // ~0.4 (different)
494
+ * ```
495
+ * @param {string} a
496
+ * @param {string} b
497
+ * @returns {number}
498
+ */
499
+ export function simhashSimilarity(a, b) {
500
+ const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
501
+ const len0 = WASM_VECTOR_LEN;
502
+ const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
503
+ const len1 = WASM_VECTOR_LEN;
504
+ const ret = wasm.simhashSimilarity(ptr0, len0, ptr1, len1);
505
+ return ret;
506
+ }
507
+
508
+ /**
509
+ * Encode an embedding using lossless full vector encoding.
510
+ *
511
+ * Preserves the exact embedding values (32-bit float precision) and all dimensions.
512
+ * This produces the largest output but allows exact reconstruction.
513
+ *
514
+ * # Parameters
515
+ *
516
+ * - `embedding`: Float64 array of embedding values (64-2048 dimensions)
517
+ *
518
+ * # Returns
519
+ *
520
+ * A base32hex-encoded ELID string that can be decoded back to the original embedding.
521
+ *
522
+ * # JavaScript Example
523
+ *
524
+ * ```javascript
525
+ * import { encodeElidLossless, decodeElidToEmbedding } from 'elid';
526
+ *
527
+ * const embedding = new Float64Array(768).fill(0.1);
528
+ * const elid = encodeElidLossless(embedding);
529
+ *
530
+ * // Later, recover the exact embedding
531
+ * const recovered = decodeElidToEmbedding(elid);
532
+ * // recovered is identical to embedding
533
+ * ```
534
+ * @param {Float64Array} embedding
535
+ * @returns {string}
536
+ */
537
+ export function encodeElidLossless(embedding) {
538
+ let deferred3_0;
539
+ let deferred3_1;
540
+ try {
541
+ const ptr0 = passArrayF64ToWasm0(embedding, wasm.__wbindgen_malloc);
542
+ const len0 = WASM_VECTOR_LEN;
543
+ const ret = wasm.encodeElidLossless(ptr0, len0);
544
+ var ptr2 = ret[0];
545
+ var len2 = ret[1];
546
+ if (ret[3]) {
547
+ ptr2 = 0; len2 = 0;
548
+ throw takeFromExternrefTable0(ret[2]);
549
+ }
550
+ deferred3_0 = ptr2;
551
+ deferred3_1 = len2;
552
+ return getStringFromWasm0(ptr2, len2);
553
+ } finally {
554
+ wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
555
+ }
556
+ }
557
+
558
+ /**
559
+ * Compute the normalized Levenshtein similarity between two strings.
560
+ *
561
+ * Returns a value between 0.0 (completely different) and 1.0 (identical).
562
+ *
563
+ * # JavaScript Example
564
+ *
565
+ * ```javascript
566
+ * import { normalizedLevenshtein } from 'elid';
567
+ *
568
+ * const similarity = normalizedLevenshtein("hello", "hallo");
569
+ * console.log(similarity); // ~0.8
570
+ * ```
571
+ * @param {string} a
572
+ * @param {string} b
573
+ * @returns {number}
574
+ */
575
+ export function normalizedLevenshtein(a, b) {
576
+ const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
577
+ const len0 = WASM_VECTOR_LEN;
578
+ const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
579
+ const len1 = WASM_VECTOR_LEN;
580
+ const ret = wasm.normalizedLevenshtein(ptr0, len0, ptr1, len1);
581
+ return ret;
582
+ }
583
+
584
+ /**
585
+ * Decode an ELID string back to an embedding vector.
586
+ *
587
+ * Only works for ELIDs encoded with a FullVector profile (lossless,
588
+ * compressed, or max_length). Returns null for non-reversible profiles
589
+ * like Mini128, Morton, or Hilbert.
590
+ *
591
+ * # Parameters
592
+ *
593
+ * - `elid_str`: A valid ELID string (base32hex encoded)
594
+ *
595
+ * # Returns
596
+ *
597
+ * A Float64Array containing the decoded embedding, or null if the ELID
598
+ * is not reversible.
599
+ *
600
+ * Note: If dimension reduction was used during encoding, the decoded
601
+ * embedding will be in the reduced dimension space, not the original.
602
+ *
603
+ * # JavaScript Example
604
+ *
605
+ * ```javascript
606
+ * import { encodeElidLossless, decodeElidToEmbedding, isElidReversible } from 'elid';
607
+ *
608
+ * const embedding = new Float64Array(768).fill(0.1);
609
+ * const elid = encodeElidLossless(embedding);
610
+ *
611
+ * if (isElidReversible(elid)) {
612
+ * const recovered = decodeElidToEmbedding(elid);
613
+ * console.log(recovered.length); // 768
614
+ * }
506
615
  * ```
507
616
  * @param {string} elid_str
508
- * @returns {Uint8Array}
617
+ * @returns {any}
509
618
  */
510
- export function decodeElid(elid_str) {
619
+ export function decodeElidToEmbedding(elid_str) {
511
620
  const ptr0 = passStringToWasm0(elid_str, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
512
621
  const len0 = WASM_VECTOR_LEN;
513
- const ret = wasm.decodeElid(ptr0, len0);
514
- if (ret[3]) {
515
- throw takeFromExternrefTable0(ret[2]);
622
+ const ret = wasm.decodeElidToEmbedding(ptr0, len0);
623
+ if (ret[2]) {
624
+ throw takeFromExternrefTable0(ret[1]);
516
625
  }
517
- var v2 = getArrayU8FromWasm0(ret[0], ret[1]).slice();
518
- wasm.__wbindgen_free(ret[0], ret[1] * 1, 1);
519
- return v2;
626
+ return takeFromExternrefTable0(ret[0]);
520
627
  }
521
628
 
522
629
  /**
@@ -561,62 +668,149 @@ export function getElidMetadata(elid_str) {
561
668
  return takeFromExternrefTable0(ret[0]);
562
669
  }
563
670
 
671
+ function getArrayU8FromWasm0(ptr, len) {
672
+ ptr = ptr >>> 0;
673
+ return getUint8ArrayMemory0().subarray(ptr / 1, ptr / 1 + len);
674
+ }
564
675
  /**
565
- * Encode an embedding for cross-dimensional comparison.
676
+ * Decode an ELID string to raw bytes.
566
677
  *
567
- * Projects the embedding to a common dimension space, allowing comparison
568
- * between embeddings of different original dimensions (e.g., 256d vs 768d).
678
+ * Returns the raw byte representation of an ELID, including the header
679
+ * and payload bytes. Useful for custom processing or debugging.
569
680
  *
570
681
  * # Parameters
571
682
  *
572
- * - `embedding`: Float64 array of embedding values (64-2048 dimensions)
573
- * - `common_dims`: Target dimension space (all vectors projected here)
683
+ * - `elid_str`: A valid ELID string (base32hex encoded)
574
684
  *
575
685
  * # Returns
576
686
  *
577
- * A base32hex-encoded ELID string.
687
+ * A Uint8Array containing the raw bytes (header + payload).
578
688
  *
579
689
  * # JavaScript Example
580
690
  *
581
691
  * ```javascript
582
- * import { encodeElidCrossDimensional, decodeElidToEmbedding } from 'elid';
692
+ * import { decodeElid } from 'elid';
583
693
  *
584
- * // Different sized embeddings from different models
585
- * const embedding256 = new Float64Array(256).fill(0.1);
586
- * const embedding768 = new Float64Array(768).fill(0.1);
694
+ * const bytes = decodeElid("012345abcdef...");
695
+ * console.log(bytes); // Uint8Array [...]
696
+ * ```
697
+ * @param {string} elid_str
698
+ * @returns {Uint8Array}
699
+ */
700
+ export function decodeElid(elid_str) {
701
+ const ptr0 = passStringToWasm0(elid_str, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
702
+ const len0 = WASM_VECTOR_LEN;
703
+ const ret = wasm.decodeElid(ptr0, len0);
704
+ if (ret[3]) {
705
+ throw takeFromExternrefTable0(ret[2]);
706
+ }
707
+ var v2 = getArrayU8FromWasm0(ret[0], ret[1]).slice();
708
+ wasm.__wbindgen_free(ret[0], ret[1] * 1, 1);
709
+ return v2;
710
+ }
711
+
712
+ /**
713
+ * Compute the Levenshtein distance between two strings.
587
714
  *
588
- * // Project both to 128-dim common space
589
- * const elid1 = encodeElidCrossDimensional(embedding256, 128);
590
- * const elid2 = encodeElidCrossDimensional(embedding768, 128);
715
+ * Returns the minimum number of single-character edits needed to transform one string into another.
591
716
  *
592
- * // Now they can be compared directly (both decode to 128 dims)
593
- * const dec1 = decodeElidToEmbedding(elid1);
594
- * const dec2 = decodeElidToEmbedding(elid2);
595
- * // Both have length 128
717
+ * # JavaScript Example
718
+ *
719
+ * ```javascript
720
+ * import { levenshtein } from 'elid';
721
+ *
722
+ * const distance = levenshtein("kitten", "sitting");
723
+ * console.log(distance); // 3
596
724
  * ```
597
- * @param {Float64Array} embedding
598
- * @param {number} common_dims
599
- * @returns {string}
725
+ * @param {string} a
726
+ * @param {string} b
727
+ * @returns {number}
600
728
  */
601
- export function encodeElidCrossDimensional(embedding, common_dims) {
602
- let deferred3_0;
603
- let deferred3_1;
604
- try {
605
- const ptr0 = passArrayF64ToWasm0(embedding, wasm.__wbindgen_malloc);
606
- const len0 = WASM_VECTOR_LEN;
607
- const ret = wasm.encodeElidCrossDimensional(ptr0, len0, common_dims);
608
- var ptr2 = ret[0];
609
- var len2 = ret[1];
610
- if (ret[3]) {
611
- ptr2 = 0; len2 = 0;
612
- throw takeFromExternrefTable0(ret[2]);
613
- }
614
- deferred3_0 = ptr2;
615
- deferred3_1 = len2;
616
- return getStringFromWasm0(ptr2, len2);
617
- } finally {
618
- wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
729
+ export function levenshtein(a, b) {
730
+ const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
731
+ const len0 = WASM_VECTOR_LEN;
732
+ const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
733
+ const len1 = WASM_VECTOR_LEN;
734
+ const ret = wasm.levenshtein(ptr0, len0, ptr1, len1);
735
+ return ret >>> 0;
736
+ }
737
+
738
+ /**
739
+ * Check if an ELID can be decoded back to an embedding.
740
+ *
741
+ * Returns true if the ELID was encoded with a FullVector profile
742
+ * (lossless, compressed, or max_length), false otherwise.
743
+ *
744
+ * # Parameters
745
+ *
746
+ * - `elid_str`: A valid ELID string (base32hex encoded)
747
+ *
748
+ * # Returns
749
+ *
750
+ * `true` if decodeElidToEmbedding will return an embedding, `false` otherwise.
751
+ *
752
+ * # JavaScript Example
753
+ *
754
+ * ```javascript
755
+ * import { encodeElid, encodeElidLossless, isElidReversible, ElidProfile } from 'elid';
756
+ *
757
+ * const embedding = new Float64Array(768).fill(0.1);
758
+ *
759
+ * // Mini128 is NOT reversible
760
+ * const mini128Elid = encodeElid(embedding, ElidProfile.Mini128);
761
+ * console.log(isElidReversible(mini128Elid)); // false
762
+ *
763
+ * // Lossless IS reversible
764
+ * const losslessElid = encodeElidLossless(embedding);
765
+ * console.log(isElidReversible(losslessElid)); // true
766
+ * ```
767
+ * @param {string} elid_str
768
+ * @returns {boolean}
769
+ */
770
+ export function isElidReversible(elid_str) {
771
+ const ptr0 = passStringToWasm0(elid_str, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
772
+ const len0 = WASM_VECTOR_LEN;
773
+ const ret = wasm.isElidReversible(ptr0, len0);
774
+ if (ret[2]) {
775
+ throw takeFromExternrefTable0(ret[1]);
619
776
  }
777
+ return ret[0] !== 0;
778
+ }
779
+
780
+ function _assertClass(instance, klass) {
781
+ if (!(instance instanceof klass)) {
782
+ throw new Error(`expected instance of ${klass.name}`);
783
+ }
784
+ }
785
+ /**
786
+ * Compute Levenshtein distance with custom options.
787
+ *
788
+ * # JavaScript Example
789
+ *
790
+ * ```javascript
791
+ * import { levenshteinWithOpts, SimilarityOptions } from 'elid';
792
+ *
793
+ * const opts = new SimilarityOptions();
794
+ * opts.setCaseSensitive(false);
795
+ * opts.setTrimWhitespace(true);
796
+ *
797
+ * const distance = levenshteinWithOpts(" HELLO ", "hello", opts);
798
+ * console.log(distance); // 0
799
+ * ```
800
+ * @param {string} a
801
+ * @param {string} b
802
+ * @param {SimilarityOptions} opts
803
+ * @returns {number}
804
+ */
805
+ export function levenshteinWithOpts(a, b, opts) {
806
+ const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
807
+ const len0 = WASM_VECTOR_LEN;
808
+ const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
809
+ const len1 = WASM_VECTOR_LEN;
810
+ _assertClass(opts, SimilarityOptions);
811
+ var ptr2 = opts.__destroy_into_raw();
812
+ const ret = wasm.levenshteinWithOpts(ptr0, len0, ptr1, len1, ptr2);
813
+ return ret >>> 0;
620
814
  }
621
815
 
622
816
  /**
@@ -696,126 +890,100 @@ export function findBestMatch(query, candidates) {
696
890
  }
697
891
 
698
892
  /**
699
- * Encode an embedding with percentage-based compression.
700
- *
701
- * The retention percentage (0.0-1.0) controls how much information is preserved:
702
- * - 1.0 = lossless (Full32 precision, all dimensions)
703
- * - 0.5 = half precision and/or half dimensions
704
- * - 0.25 = quarter precision and/or quarter dimensions
705
- *
706
- * The algorithm optimizes for dimension reduction first (which preserves
707
- * more geometric relationships) before reducing precision.
708
- *
709
- * # Parameters
710
- *
711
- * - `embedding`: Float64 array of embedding values (64-2048 dimensions)
712
- * - `retention_pct`: Information retention percentage (0.0-1.0)
713
- *
714
- * # Returns
893
+ * Compute the OSA (Optimal String Alignment) distance between two strings.
715
894
  *
716
- * A base32hex-encoded ELID string.
895
+ * Similar to Levenshtein but also considers transpositions as a single operation.
717
896
  *
718
897
  * # JavaScript Example
719
898
  *
720
899
  * ```javascript
721
- * import { encodeElidCompressed } from 'elid';
722
- *
723
- * const embedding = new Float64Array(768).fill(0.1);
724
- *
725
- * // 50% retention - good balance of size and fidelity
726
- * const elid = encodeElidCompressed(embedding, 0.5);
900
+ * import { osaDistance } from 'elid';
727
901
  *
728
- * // 25% retention - smaller but less accurate
729
- * const smallElid = encodeElidCompressed(embedding, 0.25);
902
+ * const distance = osaDistance("ca", "ac");
903
+ * console.log(distance); // 1 (transposition)
730
904
  * ```
731
- * @param {Float64Array} embedding
732
- * @param {number} retention_pct
733
- * @returns {string}
905
+ * @param {string} a
906
+ * @param {string} b
907
+ * @returns {number}
734
908
  */
735
- export function encodeElidCompressed(embedding, retention_pct) {
736
- let deferred3_0;
737
- let deferred3_1;
738
- try {
739
- const ptr0 = passArrayF64ToWasm0(embedding, wasm.__wbindgen_malloc);
740
- const len0 = WASM_VECTOR_LEN;
741
- const ret = wasm.encodeElidCompressed(ptr0, len0, retention_pct);
742
- var ptr2 = ret[0];
743
- var len2 = ret[1];
744
- if (ret[3]) {
745
- ptr2 = 0; len2 = 0;
746
- throw takeFromExternrefTable0(ret[2]);
747
- }
748
- deferred3_0 = ptr2;
749
- deferred3_1 = len2;
750
- return getStringFromWasm0(ptr2, len2);
751
- } finally {
752
- wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
753
- }
909
+ export function osaDistance(a, b) {
910
+ const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
911
+ const len0 = WASM_VECTOR_LEN;
912
+ const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
913
+ const len1 = WASM_VECTOR_LEN;
914
+ const ret = wasm.osaDistance(ptr0, len0, ptr1, len1);
915
+ return ret >>> 0;
754
916
  }
755
917
 
756
918
  /**
757
- * Compute the Jaro similarity between two strings.
919
+ * Compute the Hamming distance between two SimHash values.
758
920
  *
759
- * Returns a value between 0.0 (completely different) and 1.0 (identical).
760
- * Particularly effective for short strings like names.
921
+ * Returns the number of differing bits. Lower values = higher similarity.
761
922
  *
762
923
  * # JavaScript Example
763
924
  *
764
925
  * ```javascript
765
- * import { jaro } from 'elid';
926
+ * import { simhash, simhashDistance } from 'elid';
766
927
  *
767
- * const similarity = jaro("martha", "marhta");
768
- * console.log(similarity); // ~0.944
928
+ * const hash1 = simhash("iPhone 14");
929
+ * const hash2 = simhash("iPhone 15");
930
+ * const distance = simhashDistance(hash1, hash2);
931
+ *
932
+ * console.log(distance); // Low number = similar
769
933
  * ```
770
- * @param {string} a
771
- * @param {string} b
934
+ * @param {number} hash1
935
+ * @param {number} hash2
772
936
  * @returns {number}
773
937
  */
774
- export function jaro(a, b) {
775
- const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
776
- const len0 = WASM_VECTOR_LEN;
777
- const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
778
- const len1 = WASM_VECTOR_LEN;
779
- const ret = wasm.jaro(ptr0, len0, ptr1, len1);
780
- return ret;
938
+ export function simhashDistance(hash1, hash2) {
939
+ const ret = wasm.simhashDistance(hash1, hash2);
940
+ return ret >>> 0;
781
941
  }
782
942
 
783
943
  /**
784
- * Encode an embedding using lossless full vector encoding.
944
+ * Encode an embedding with percentage-based compression.
785
945
  *
786
- * Preserves the exact embedding values (32-bit float precision) and all dimensions.
787
- * This produces the largest output but allows exact reconstruction.
946
+ * The retention percentage (0.0-1.0) controls how much information is preserved:
947
+ * - 1.0 = lossless (Full32 precision, all dimensions)
948
+ * - 0.5 = half precision and/or half dimensions
949
+ * - 0.25 = quarter precision and/or quarter dimensions
950
+ *
951
+ * The algorithm optimizes for dimension reduction first (which preserves
952
+ * more geometric relationships) before reducing precision.
788
953
  *
789
954
  * # Parameters
790
955
  *
791
956
  * - `embedding`: Float64 array of embedding values (64-2048 dimensions)
957
+ * - `retention_pct`: Information retention percentage (0.0-1.0)
792
958
  *
793
959
  * # Returns
794
960
  *
795
- * A base32hex-encoded ELID string that can be decoded back to the original embedding.
961
+ * A base32hex-encoded ELID string.
796
962
  *
797
963
  * # JavaScript Example
798
964
  *
799
965
  * ```javascript
800
- * import { encodeElidLossless, decodeElidToEmbedding } from 'elid';
966
+ * import { encodeElidCompressed } from 'elid';
801
967
  *
802
968
  * const embedding = new Float64Array(768).fill(0.1);
803
- * const elid = encodeElidLossless(embedding);
804
969
  *
805
- * // Later, recover the exact embedding
806
- * const recovered = decodeElidToEmbedding(elid);
807
- * // recovered is identical to embedding
970
+ * // 50% retention - good balance of size and fidelity
971
+ * const elid = encodeElidCompressed(embedding, 0.5);
972
+ *
973
+ * // 25% retention - smaller but less accurate
974
+ * const smallElid = encodeElidCompressed(embedding, 0.25);
808
975
  * ```
809
976
  * @param {Float64Array} embedding
977
+ * @param {number} retention_pct
810
978
  * @returns {string}
811
979
  */
812
- export function encodeElidLossless(embedding) {
980
+ export function encodeElidCompressed(embedding, retention_pct) {
813
981
  let deferred3_0;
814
982
  let deferred3_1;
815
983
  try {
816
984
  const ptr0 = passArrayF64ToWasm0(embedding, wasm.__wbindgen_malloc);
817
985
  const len0 = WASM_VECTOR_LEN;
818
- const ret = wasm.encodeElidLossless(ptr0, len0);
986
+ const ret = wasm.encodeElidCompressed(ptr0, len0, retention_pct);
819
987
  var ptr2 = ret[0];
820
988
  var len2 = ret[1];
821
989
  if (ret[3]) {
@@ -831,87 +999,45 @@ export function encodeElidLossless(embedding) {
831
999
  }
832
1000
 
833
1001
  /**
834
- * Decode an ELID string back to an embedding vector.
1002
+ * Encode an embedding with a maximum output string length constraint.
835
1003
  *
836
- * Only works for ELIDs encoded with a FullVector profile (lossless,
837
- * compressed, or max_length). Returns null for non-reversible profiles
838
- * like Mini128, Morton, or Hilbert.
1004
+ * Calculates the optimal precision and dimension settings to fit within
1005
+ * the specified character limit while maximizing fidelity.
839
1006
  *
840
1007
  * # Parameters
841
1008
  *
842
- * - `elid_str`: A valid ELID string (base32hex encoded)
1009
+ * - `embedding`: Float64 array of embedding values (64-2048 dimensions)
1010
+ * - `max_chars`: Maximum output string length in characters
843
1011
  *
844
1012
  * # Returns
845
1013
  *
846
- * A Float64Array containing the decoded embedding, or null if the ELID
847
- * is not reversible.
848
- *
849
- * Note: If dimension reduction was used during encoding, the decoded
850
- * embedding will be in the reduced dimension space, not the original.
1014
+ * A base32hex-encoded ELID string guaranteed to be <= max_chars in length.
851
1015
  *
852
1016
  * # JavaScript Example
853
1017
  *
854
1018
  * ```javascript
855
- * import { encodeElidLossless, decodeElidToEmbedding, isElidReversible } from 'elid';
1019
+ * import { encodeElidMaxLength } from 'elid';
856
1020
  *
857
1021
  * const embedding = new Float64Array(768).fill(0.1);
858
- * const elid = encodeElidLossless(embedding);
859
- *
860
- * if (isElidReversible(elid)) {
861
- * const recovered = decodeElidToEmbedding(elid);
862
- * console.log(recovered.length); // 768
863
- * }
864
- * ```
865
- * @param {string} elid_str
866
- * @returns {any}
867
- */
868
- export function decodeElidToEmbedding(elid_str) {
869
- const ptr0 = passStringToWasm0(elid_str, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
870
- const len0 = WASM_VECTOR_LEN;
871
- const ret = wasm.decodeElidToEmbedding(ptr0, len0);
872
- if (ret[2]) {
873
- throw takeFromExternrefTable0(ret[1]);
874
- }
875
- return takeFromExternrefTable0(ret[0]);
876
- }
877
-
878
- /**
879
- * Encode an embedding vector to an ELID string.
880
- *
881
- * Converts a high-dimensional embedding (64-2048 dimensions) into a compact,
882
- * sortable identifier. The ELID preserves locality properties for efficient
883
- * similarity search.
884
- *
885
- * # Parameters
886
- *
887
- * - `embedding`: Float64 array of embedding values (64-2048 dimensions)
888
- * - `profile`: Encoding profile (Mini128, Morton10x10, or Hilbert10x10)
889
- *
890
- * # Returns
891
- *
892
- * A base32hex-encoded ELID string suitable for storage and comparison.
893
- *
894
- * # JavaScript Example
895
1022
  *
896
- * ```javascript
897
- * import { encodeElid, ElidProfile } from 'elid';
1023
+ * // Fit in 100 characters (e.g., for database column constraints)
1024
+ * const elid = encodeElidMaxLength(embedding, 100);
1025
+ * console.log(elid.length <= 100); // true
898
1026
  *
899
- * // OpenAI embeddings are 1536 dimensions
900
- * const embedding = await getEmbedding("Hello world");
901
- * const elid = encodeElid(embedding, ElidProfile.Mini128);
902
- * console.log(elid); // "012345abcdef..."
1027
+ * // Fit in 50 characters (more compression)
1028
+ * const shortElid = encodeElidMaxLength(embedding, 50);
903
1029
  * ```
904
1030
  * @param {Float64Array} embedding
905
- * @param {ElidProfile} profile
1031
+ * @param {number} max_chars
906
1032
  * @returns {string}
907
1033
  */
908
- export function encodeElid(embedding, profile) {
1034
+ export function encodeElidMaxLength(embedding, max_chars) {
909
1035
  let deferred3_0;
910
1036
  let deferred3_1;
911
1037
  try {
912
1038
  const ptr0 = passArrayF64ToWasm0(embedding, wasm.__wbindgen_malloc);
913
1039
  const len0 = WASM_VECTOR_LEN;
914
- const ret = wasm.encodeElid(ptr0, len0, profile);
1040
+ const ret = wasm.encodeElidMaxLength(ptr0, len0, max_chars);
915
1041
  var ptr2 = ret[0];
916
1042
  var len2 = ret[1];
917
1043
  if (ret[3]) {
@@ -927,31 +1053,29 @@ export function encodeElid(embedding, profile) {
927
1053
  }
928
1054
 
929
1055
  /**
930
- * Compute the normalized SimHash similarity between two strings.
1056
+ * Compute the Jaro-Winkler similarity between two strings.
931
1057
  *
932
1058
  * Returns a value between 0.0 (completely different) and 1.0 (identical).
1059
+ * Gives more favorable ratings to strings with common prefixes.
933
1060
  *
934
1061
  * # JavaScript Example
935
1062
  *
936
1063
  * ```javascript
937
- * import { simhashSimilarity } from 'elid';
938
- *
939
- * const similarity = simhashSimilarity("iPhone 14", "iPhone 15");
940
- * console.log(similarity); // ~0.9 (very similar)
1064
+ * import { jaroWinkler } from 'elid';
941
1065
  *
942
- * const similarity2 = simhashSimilarity("iPhone", "Galaxy");
943
- * console.log(similarity2); // ~0.4 (different)
1066
+ * const similarity = jaroWinkler("martha", "marhta");
1067
+ * console.log(similarity); // ~0.961
944
1068
  * ```
945
1069
  * @param {string} a
946
1070
  * @param {string} b
947
1071
  * @returns {number}
948
1072
  */
949
- export function simhashSimilarity(a, b) {
1073
+ export function jaroWinkler(a, b) {
950
1074
  const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
951
1075
  const len0 = WASM_VECTOR_LEN;
952
1076
  const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
953
1077
  const len1 = WASM_VECTOR_LEN;
954
- const ret = wasm.simhashSimilarity(ptr0, len0, ptr1, len1);
1078
+ const ret = wasm.jaroWinkler(ptr0, len0, ptr1, len1);
955
1079
  return ret;
956
1080
  }
957
1081
 
@@ -985,130 +1109,6 @@ export function hamming(a, b) {
985
1109
  return ret === 0x100000001 ? undefined : ret;
986
1110
  }
987
1111
 
988
- /**
989
- * Compute the best matching similarity between two strings.
990
- *
991
- * Runs multiple algorithms and returns the highest score.
992
- *
993
- * # JavaScript Example
994
- *
995
- * ```javascript
996
- * import { bestMatch } from 'elid';
997
- *
998
- * const score = bestMatch("hello", "hallo");
999
- * console.log(score); // ~0.8
1000
- * ```
1001
- * @param {string} a
1002
- * @param {string} b
1003
- * @returns {number}
1004
- */
1005
- export function bestMatch(a, b) {
1006
- const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
1007
- const len0 = WASM_VECTOR_LEN;
1008
- const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
1009
- const len1 = WASM_VECTOR_LEN;
1010
- const ret = wasm.bestMatch(ptr0, len0, ptr1, len1);
1011
- return ret;
1012
- }
1013
-
1014
- /**
1015
- * Compute the Jaro-Winkler similarity between two strings.
1016
- *
1017
- * Returns a value between 0.0 (completely different) and 1.0 (identical).
1018
- * Gives more favorable ratings to strings with common prefixes.
1019
- *
1020
- * # JavaScript Example
1021
- *
1022
- * ```javascript
1023
- * import { jaroWinkler } from 'elid';
1024
- *
1025
- * const similarity = jaroWinkler("martha", "marhta");
1026
- * console.log(similarity); // ~0.961
1027
- * ```
1028
- * @param {string} a
1029
- * @param {string} b
1030
- * @returns {number}
1031
- */
1032
- export function jaroWinkler(a, b) {
1033
- const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
1034
- const len0 = WASM_VECTOR_LEN;
1035
- const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
1036
- const len1 = WASM_VECTOR_LEN;
1037
- const ret = wasm.jaroWinkler(ptr0, len0, ptr1, len1);
1038
- return ret;
1039
- }
1040
-
1041
- /**
1042
- * Compute the normalized Levenshtein similarity between two strings.
1043
- *
1044
- * Returns a value between 0.0 (completely different) and 1.0 (identical).
1045
- *
1046
- * # JavaScript Example
1047
- *
1048
- * ```javascript
1049
- * import { normalizedLevenshtein } from 'elid';
1050
- *
1051
- * const similarity = normalizedLevenshtein("hello", "hallo");
1052
- * console.log(similarity); // ~0.8
1053
- * ```
1054
- * @param {string} a
1055
- * @param {string} b
1056
- * @returns {number}
1057
- */
1058
- export function normalizedLevenshtein(a, b) {
1059
- const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
1060
- const len0 = WASM_VECTOR_LEN;
1061
- const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
1062
- const len1 = WASM_VECTOR_LEN;
1063
- const ret = wasm.normalizedLevenshtein(ptr0, len0, ptr1, len1);
1064
- return ret;
1065
- }
1066
-
1067
- let cachedUint32ArrayMemory0 = null;
1068
-
1069
- function getUint32ArrayMemory0() {
1070
- if (cachedUint32ArrayMemory0 === null || cachedUint32ArrayMemory0.byteLength === 0) {
1071
- cachedUint32ArrayMemory0 = new Uint32Array(wasm.memory.buffer);
1072
- }
1073
- return cachedUint32ArrayMemory0;
1074
- }
1075
-
1076
- function getArrayU32FromWasm0(ptr, len) {
1077
- ptr = ptr >>> 0;
1078
- return getUint32ArrayMemory0().subarray(ptr / 4, ptr / 4 + len);
1079
- }
1080
- /**
1081
- * Find all hashes within a given distance threshold.
1082
- *
1083
- * Useful for database queries - pre-compute hashes, then find similar ones.
1084
- *
1085
- * # JavaScript Example
1086
- *
1087
- * ```javascript
1088
- * import { simhash, findSimilarHashes } from 'elid';
1089
- *
1090
- * const candidates = ["iPhone 14 Pro", "iPhone 13", "Galaxy S23"];
1091
- * const hashes = candidates.map(s => simhash(s));
1092
- *
1093
- * const queryHash = simhash("iPhone 14");
1094
- * const matches = findSimilarHashes(queryHash, hashes, 10);
1095
- *
1096
- * console.log(matches); // [0, 1] - indices of similar items
1097
- * ```
1098
- * @param {number} query_hash
1099
- * @param {Float64Array} candidate_hashes
1100
- * @param {number} max_distance
1101
- * @returns {Uint32Array}
1102
- */
1103
- export function findSimilarHashes(query_hash, candidate_hashes, max_distance) {
1104
- const ptr0 = passArrayF64ToWasm0(candidate_hashes, wasm.__wbindgen_malloc);
1105
- const len0 = WASM_VECTOR_LEN;
1106
- const ret = wasm.findSimilarHashes(query_hash, ptr0, len0, max_distance);
1107
- var v2 = getArrayU32FromWasm0(ret[0], ret[1]).slice();
1108
- wasm.__wbindgen_free(ret[0], ret[1] * 4, 4);
1109
- return v2;
1110
- }
1111
-
1112
1112
  /**
1113
1113
  * Dimension handling mode for full vector encoding.
1114
1114
  *