elid 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +144 -8
- package/elid.d.ts +485 -83
- package/elid_bg.js +767 -169
- package/elid_bg.wasm +0 -0
- package/package.json +7 -7
package/elid_bg.js
CHANGED
|
@@ -180,6 +180,273 @@ function handleError(f, args) {
|
|
|
180
180
|
wasm.__wbindgen_exn_store(idx);
|
|
181
181
|
}
|
|
182
182
|
}
|
|
183
|
+
/**
|
|
184
|
+
* Compute the OSA (Optimal String Alignment) distance between two strings.
|
|
185
|
+
*
|
|
186
|
+
* Similar to Levenshtein but also considers transpositions as a single operation.
|
|
187
|
+
*
|
|
188
|
+
* # JavaScript Example
|
|
189
|
+
*
|
|
190
|
+
* ```javascript
|
|
191
|
+
* import { osaDistance } from 'elid';
|
|
192
|
+
*
|
|
193
|
+
* const distance = osaDistance("ca", "ac");
|
|
194
|
+
* console.log(distance); // 1 (transposition)
|
|
195
|
+
* ```
|
|
196
|
+
* @param {string} a
|
|
197
|
+
* @param {string} b
|
|
198
|
+
* @returns {number}
|
|
199
|
+
*/
|
|
200
|
+
export function osaDistance(a, b) {
|
|
201
|
+
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
202
|
+
const len0 = WASM_VECTOR_LEN;
|
|
203
|
+
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
204
|
+
const len1 = WASM_VECTOR_LEN;
|
|
205
|
+
const ret = wasm.osaDistance(ptr0, len0, ptr1, len1);
|
|
206
|
+
return ret >>> 0;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
/**
|
|
210
|
+
* Compute the Levenshtein distance between two strings.
|
|
211
|
+
*
|
|
212
|
+
* Returns the minimum number of single-character edits needed to transform one string into another.
|
|
213
|
+
*
|
|
214
|
+
* # JavaScript Example
|
|
215
|
+
*
|
|
216
|
+
* ```javascript
|
|
217
|
+
* import { levenshtein } from 'elid';
|
|
218
|
+
*
|
|
219
|
+
* const distance = levenshtein("kitten", "sitting");
|
|
220
|
+
* console.log(distance); // 3
|
|
221
|
+
* ```
|
|
222
|
+
* @param {string} a
|
|
223
|
+
* @param {string} b
|
|
224
|
+
* @returns {number}
|
|
225
|
+
*/
|
|
226
|
+
export function levenshtein(a, b) {
|
|
227
|
+
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
228
|
+
const len0 = WASM_VECTOR_LEN;
|
|
229
|
+
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
230
|
+
const len1 = WASM_VECTOR_LEN;
|
|
231
|
+
const ret = wasm.levenshtein(ptr0, len0, ptr1, len1);
|
|
232
|
+
return ret >>> 0;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
function passArrayJsValueToWasm0(array, malloc) {
|
|
236
|
+
const ptr = malloc(array.length * 4, 4) >>> 0;
|
|
237
|
+
for (let i = 0; i < array.length; i++) {
|
|
238
|
+
const add = addToExternrefTable0(array[i]);
|
|
239
|
+
getDataViewMemory0().setUint32(ptr + 4 * i, add, true);
|
|
240
|
+
}
|
|
241
|
+
WASM_VECTOR_LEN = array.length;
|
|
242
|
+
return ptr;
|
|
243
|
+
}
|
|
244
|
+
/**
|
|
245
|
+
* Find all matches above a threshold score.
|
|
246
|
+
*
|
|
247
|
+
* Returns an array of objects with index and score for all candidates above the threshold.
|
|
248
|
+
*
|
|
249
|
+
* # JavaScript Example
|
|
250
|
+
*
|
|
251
|
+
* ```javascript
|
|
252
|
+
* import { findMatchesAboveThreshold } from 'elid';
|
|
253
|
+
*
|
|
254
|
+
* const candidates = ["apple", "application", "apply", "banana"];
|
|
255
|
+
* const matches = findMatchesAboveThreshold("app", candidates, 0.5);
|
|
256
|
+
* console.log(matches); // [{ index: 0, score: 0.907 }, { index: 1, score: 0.830 }, ...]
|
|
257
|
+
* ```
|
|
258
|
+
* @param {string} query
|
|
259
|
+
* @param {string[]} candidates
|
|
260
|
+
* @param {number} threshold
|
|
261
|
+
* @returns {any}
|
|
262
|
+
*/
|
|
263
|
+
export function findMatchesAboveThreshold(query, candidates, threshold) {
|
|
264
|
+
const ptr0 = passStringToWasm0(query, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
265
|
+
const len0 = WASM_VECTOR_LEN;
|
|
266
|
+
const ptr1 = passArrayJsValueToWasm0(candidates, wasm.__wbindgen_malloc);
|
|
267
|
+
const len1 = WASM_VECTOR_LEN;
|
|
268
|
+
const ret = wasm.findMatchesAboveThreshold(ptr0, len0, ptr1, len1, threshold);
|
|
269
|
+
return ret;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* Compute the Hamming distance between two SimHash values.
|
|
274
|
+
*
|
|
275
|
+
* Returns the number of differing bits. Lower values = higher similarity.
|
|
276
|
+
*
|
|
277
|
+
* # JavaScript Example
|
|
278
|
+
*
|
|
279
|
+
* ```javascript
|
|
280
|
+
* import { simhash, simhashDistance } from 'elid';
|
|
281
|
+
*
|
|
282
|
+
* const hash1 = simhash("iPhone 14");
|
|
283
|
+
* const hash2 = simhash("iPhone 15");
|
|
284
|
+
* const distance = simhashDistance(hash1, hash2);
|
|
285
|
+
*
|
|
286
|
+
* console.log(distance); // Low number = similar
|
|
287
|
+
* ```
|
|
288
|
+
* @param {number} hash1
|
|
289
|
+
* @param {number} hash2
|
|
290
|
+
* @returns {number}
|
|
291
|
+
*/
|
|
292
|
+
export function simhashDistance(hash1, hash2) {
|
|
293
|
+
const ret = wasm.simhashDistance(hash1, hash2);
|
|
294
|
+
return ret >>> 0;
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
let cachedFloat64ArrayMemory0 = null;
|
|
298
|
+
|
|
299
|
+
function getFloat64ArrayMemory0() {
|
|
300
|
+
if (cachedFloat64ArrayMemory0 === null || cachedFloat64ArrayMemory0.byteLength === 0) {
|
|
301
|
+
cachedFloat64ArrayMemory0 = new Float64Array(wasm.memory.buffer);
|
|
302
|
+
}
|
|
303
|
+
return cachedFloat64ArrayMemory0;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
function passArrayF64ToWasm0(arg, malloc) {
|
|
307
|
+
const ptr = malloc(arg.length * 8, 8) >>> 0;
|
|
308
|
+
getFloat64ArrayMemory0().set(arg, ptr / 8);
|
|
309
|
+
WASM_VECTOR_LEN = arg.length;
|
|
310
|
+
return ptr;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
function takeFromExternrefTable0(idx) {
|
|
314
|
+
const value = wasm.__wbindgen_externrefs.get(idx);
|
|
315
|
+
wasm.__externref_table_dealloc(idx);
|
|
316
|
+
return value;
|
|
317
|
+
}
|
|
318
|
+
/**
|
|
319
|
+
* Encode an embedding with a maximum output string length constraint.
|
|
320
|
+
*
|
|
321
|
+
* Calculates the optimal precision and dimension settings to fit within
|
|
322
|
+
* the specified character limit while maximizing fidelity.
|
|
323
|
+
*
|
|
324
|
+
* # Parameters
|
|
325
|
+
*
|
|
326
|
+
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
327
|
+
* - `max_chars`: Maximum output string length in characters
|
|
328
|
+
*
|
|
329
|
+
* # Returns
|
|
330
|
+
*
|
|
331
|
+
* A base32hex-encoded ELID string guaranteed to be <= max_chars in length.
|
|
332
|
+
*
|
|
333
|
+
* # JavaScript Example
|
|
334
|
+
*
|
|
335
|
+
* ```javascript
|
|
336
|
+
* import { encodeElidMaxLength } from 'elid';
|
|
337
|
+
*
|
|
338
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
339
|
+
*
|
|
340
|
+
* // Fit in 100 characters (e.g., for database column constraints)
|
|
341
|
+
* const elid = encodeElidMaxLength(embedding, 100);
|
|
342
|
+
* console.log(elid.length <= 100); // true
|
|
343
|
+
*
|
|
344
|
+
* // Fit in 50 characters (more compression)
|
|
345
|
+
* const shortElid = encodeElidMaxLength(embedding, 50);
|
|
346
|
+
* ```
|
|
347
|
+
* @param {Float64Array} embedding
|
|
348
|
+
* @param {number} max_chars
|
|
349
|
+
* @returns {string}
|
|
350
|
+
*/
|
|
351
|
+
export function encodeElidMaxLength(embedding, max_chars) {
|
|
352
|
+
let deferred3_0;
|
|
353
|
+
let deferred3_1;
|
|
354
|
+
try {
|
|
355
|
+
const ptr0 = passArrayF64ToWasm0(embedding, wasm.__wbindgen_malloc);
|
|
356
|
+
const len0 = WASM_VECTOR_LEN;
|
|
357
|
+
const ret = wasm.encodeElidMaxLength(ptr0, len0, max_chars);
|
|
358
|
+
var ptr2 = ret[0];
|
|
359
|
+
var len2 = ret[1];
|
|
360
|
+
if (ret[3]) {
|
|
361
|
+
ptr2 = 0; len2 = 0;
|
|
362
|
+
throw takeFromExternrefTable0(ret[2]);
|
|
363
|
+
}
|
|
364
|
+
deferred3_0 = ptr2;
|
|
365
|
+
deferred3_1 = len2;
|
|
366
|
+
return getStringFromWasm0(ptr2, len2);
|
|
367
|
+
} finally {
|
|
368
|
+
wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
function _assertClass(instance, klass) {
|
|
373
|
+
if (!(instance instanceof klass)) {
|
|
374
|
+
throw new Error(`expected instance of ${klass.name}`);
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
/**
|
|
378
|
+
* Compute Levenshtein distance with custom options.
|
|
379
|
+
*
|
|
380
|
+
* # JavaScript Example
|
|
381
|
+
*
|
|
382
|
+
* ```javascript
|
|
383
|
+
* import { levenshteinWithOpts, SimilarityOptions } from 'elid';
|
|
384
|
+
*
|
|
385
|
+
* const opts = new SimilarityOptions();
|
|
386
|
+
* opts.setCaseSensitive(false);
|
|
387
|
+
* opts.setTrimWhitespace(true);
|
|
388
|
+
*
|
|
389
|
+
* const distance = levenshteinWithOpts(" HELLO ", "hello", opts);
|
|
390
|
+
* console.log(distance); // 0
|
|
391
|
+
* ```
|
|
392
|
+
* @param {string} a
|
|
393
|
+
* @param {string} b
|
|
394
|
+
* @param {SimilarityOptions} opts
|
|
395
|
+
* @returns {number}
|
|
396
|
+
*/
|
|
397
|
+
export function levenshteinWithOpts(a, b, opts) {
|
|
398
|
+
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
399
|
+
const len0 = WASM_VECTOR_LEN;
|
|
400
|
+
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
401
|
+
const len1 = WASM_VECTOR_LEN;
|
|
402
|
+
_assertClass(opts, SimilarityOptions);
|
|
403
|
+
var ptr2 = opts.__destroy_into_raw();
|
|
404
|
+
const ret = wasm.levenshteinWithOpts(ptr0, len0, ptr1, len1, ptr2);
|
|
405
|
+
return ret >>> 0;
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
/**
|
|
409
|
+
* Check if an ELID can be decoded back to an embedding.
|
|
410
|
+
*
|
|
411
|
+
* Returns true if the ELID was encoded with a FullVector profile
|
|
412
|
+
* (lossless, compressed, or max_length), false otherwise.
|
|
413
|
+
*
|
|
414
|
+
* # Parameters
|
|
415
|
+
*
|
|
416
|
+
* - `elid_str`: A valid ELID string (base32hex encoded)
|
|
417
|
+
*
|
|
418
|
+
* # Returns
|
|
419
|
+
*
|
|
420
|
+
* `true` if decodeElidToEmbedding will return an embedding, `false` otherwise.
|
|
421
|
+
*
|
|
422
|
+
* # JavaScript Example
|
|
423
|
+
*
|
|
424
|
+
* ```javascript
|
|
425
|
+
* import { encodeElid, encodeElidLossless, isElidReversible, ElidProfile } from 'elid';
|
|
426
|
+
*
|
|
427
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
428
|
+
*
|
|
429
|
+
* // Mini128 is NOT reversible
|
|
430
|
+
* const mini128Elid = encodeElid(embedding, ElidProfile.Mini128);
|
|
431
|
+
* console.log(isElidReversible(mini128Elid)); // false
|
|
432
|
+
*
|
|
433
|
+
* // Lossless IS reversible
|
|
434
|
+
* const losslessElid = encodeElidLossless(embedding);
|
|
435
|
+
* console.log(isElidReversible(losslessElid)); // true
|
|
436
|
+
* ```
|
|
437
|
+
* @param {string} elid_str
|
|
438
|
+
* @returns {boolean}
|
|
439
|
+
*/
|
|
440
|
+
export function isElidReversible(elid_str) {
|
|
441
|
+
const ptr0 = passStringToWasm0(elid_str, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
442
|
+
const len0 = WASM_VECTOR_LEN;
|
|
443
|
+
const ret = wasm.isElidReversible(ptr0, len0);
|
|
444
|
+
if (ret[2]) {
|
|
445
|
+
throw takeFromExternrefTable0(ret[1]);
|
|
446
|
+
}
|
|
447
|
+
return ret[0] !== 0;
|
|
448
|
+
}
|
|
449
|
+
|
|
183
450
|
/**
|
|
184
451
|
* Compute the SimHash fingerprint of a string.
|
|
185
452
|
*
|
|
@@ -211,173 +478,481 @@ export function simhash(text) {
|
|
|
211
478
|
return ret;
|
|
212
479
|
}
|
|
213
480
|
|
|
481
|
+
function getArrayU8FromWasm0(ptr, len) {
|
|
482
|
+
ptr = ptr >>> 0;
|
|
483
|
+
return getUint8ArrayMemory0().subarray(ptr / 1, ptr / 1 + len);
|
|
484
|
+
}
|
|
214
485
|
/**
|
|
215
|
-
*
|
|
486
|
+
* Decode an ELID string to raw bytes.
|
|
216
487
|
*
|
|
217
|
-
* Returns
|
|
488
|
+
* Returns the raw byte representation of an ELID, including the header
|
|
489
|
+
* and payload bytes. Useful for custom processing or debugging.
|
|
490
|
+
*
|
|
491
|
+
* # Parameters
|
|
492
|
+
*
|
|
493
|
+
* - `elid_str`: A valid ELID string (base32hex encoded)
|
|
494
|
+
*
|
|
495
|
+
* # Returns
|
|
496
|
+
*
|
|
497
|
+
* A Uint8Array containing the raw bytes (header + payload).
|
|
218
498
|
*
|
|
219
499
|
* # JavaScript Example
|
|
220
500
|
*
|
|
221
501
|
* ```javascript
|
|
222
|
-
* import {
|
|
502
|
+
* import { decodeElid } from 'elid';
|
|
223
503
|
*
|
|
224
|
-
* const
|
|
225
|
-
* console.log(
|
|
504
|
+
* const bytes = decodeElid("012345abcdef...");
|
|
505
|
+
* console.log(bytes); // Uint8Array [...]
|
|
226
506
|
* ```
|
|
227
|
-
* @param {string}
|
|
228
|
-
* @
|
|
507
|
+
* @param {string} elid_str
|
|
508
|
+
* @returns {Uint8Array}
|
|
509
|
+
*/
|
|
510
|
+
export function decodeElid(elid_str) {
|
|
511
|
+
const ptr0 = passStringToWasm0(elid_str, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
512
|
+
const len0 = WASM_VECTOR_LEN;
|
|
513
|
+
const ret = wasm.decodeElid(ptr0, len0);
|
|
514
|
+
if (ret[3]) {
|
|
515
|
+
throw takeFromExternrefTable0(ret[2]);
|
|
516
|
+
}
|
|
517
|
+
var v2 = getArrayU8FromWasm0(ret[0], ret[1]).slice();
|
|
518
|
+
wasm.__wbindgen_free(ret[0], ret[1] * 1, 1);
|
|
519
|
+
return v2;
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
/**
|
|
523
|
+
* Get metadata about a FullVector ELID.
|
|
524
|
+
*
|
|
525
|
+
* Returns an object containing information about how the ELID was encoded,
|
|
526
|
+
* including original dimensions, precision, and dimension mode.
|
|
527
|
+
*
|
|
528
|
+
* # Parameters
|
|
529
|
+
*
|
|
530
|
+
* - `elid_str`: A valid ELID string (base32hex encoded)
|
|
531
|
+
*
|
|
532
|
+
* # Returns
|
|
533
|
+
*
|
|
534
|
+
* An object with metadata fields, or null if not a FullVector ELID.
|
|
535
|
+
*
|
|
536
|
+
* # JavaScript Example
|
|
537
|
+
*
|
|
538
|
+
* ```javascript
|
|
539
|
+
* import { encodeElidCompressed, getElidMetadata } from 'elid';
|
|
540
|
+
*
|
|
541
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
542
|
+
* const elid = encodeElidCompressed(embedding, 0.5);
|
|
543
|
+
*
|
|
544
|
+
* const meta = getElidMetadata(elid);
|
|
545
|
+
* if (meta) {
|
|
546
|
+
* console.log(meta.originalDims); // 768
|
|
547
|
+
* console.log(meta.encodedDims); // depends on compression
|
|
548
|
+
* console.log(meta.isLossless); // false
|
|
549
|
+
* }
|
|
550
|
+
* ```
|
|
551
|
+
* @param {string} elid_str
|
|
552
|
+
* @returns {any}
|
|
553
|
+
*/
|
|
554
|
+
export function getElidMetadata(elid_str) {
|
|
555
|
+
const ptr0 = passStringToWasm0(elid_str, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
556
|
+
const len0 = WASM_VECTOR_LEN;
|
|
557
|
+
const ret = wasm.getElidMetadata(ptr0, len0);
|
|
558
|
+
if (ret[2]) {
|
|
559
|
+
throw takeFromExternrefTable0(ret[1]);
|
|
560
|
+
}
|
|
561
|
+
return takeFromExternrefTable0(ret[0]);
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
/**
|
|
565
|
+
* Encode an embedding for cross-dimensional comparison.
|
|
566
|
+
*
|
|
567
|
+
* Projects the embedding to a common dimension space, allowing comparison
|
|
568
|
+
* between embeddings of different original dimensions (e.g., 256d vs 768d).
|
|
569
|
+
*
|
|
570
|
+
* # Parameters
|
|
571
|
+
*
|
|
572
|
+
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
573
|
+
* - `common_dims`: Target dimension space (all vectors projected here)
|
|
574
|
+
*
|
|
575
|
+
* # Returns
|
|
576
|
+
*
|
|
577
|
+
* A base32hex-encoded ELID string.
|
|
578
|
+
*
|
|
579
|
+
* # JavaScript Example
|
|
580
|
+
*
|
|
581
|
+
* ```javascript
|
|
582
|
+
* import { encodeElidCrossDimensional, decodeElidToEmbedding } from 'elid';
|
|
583
|
+
*
|
|
584
|
+
* // Different sized embeddings from different models
|
|
585
|
+
* const embedding256 = new Float64Array(256).fill(0.1);
|
|
586
|
+
* const embedding768 = new Float64Array(768).fill(0.1);
|
|
587
|
+
*
|
|
588
|
+
* // Project both to 128-dim common space
|
|
589
|
+
* const elid1 = encodeElidCrossDimensional(embedding256, 128);
|
|
590
|
+
* const elid2 = encodeElidCrossDimensional(embedding768, 128);
|
|
591
|
+
*
|
|
592
|
+
* // Now they can be compared directly (both decode to 128 dims)
|
|
593
|
+
* const dec1 = decodeElidToEmbedding(elid1);
|
|
594
|
+
* const dec2 = decodeElidToEmbedding(elid2);
|
|
595
|
+
* // Both have length 128
|
|
596
|
+
* ```
|
|
597
|
+
* @param {Float64Array} embedding
|
|
598
|
+
* @param {number} common_dims
|
|
599
|
+
* @returns {string}
|
|
600
|
+
*/
|
|
601
|
+
export function encodeElidCrossDimensional(embedding, common_dims) {
|
|
602
|
+
let deferred3_0;
|
|
603
|
+
let deferred3_1;
|
|
604
|
+
try {
|
|
605
|
+
const ptr0 = passArrayF64ToWasm0(embedding, wasm.__wbindgen_malloc);
|
|
606
|
+
const len0 = WASM_VECTOR_LEN;
|
|
607
|
+
const ret = wasm.encodeElidCrossDimensional(ptr0, len0, common_dims);
|
|
608
|
+
var ptr2 = ret[0];
|
|
609
|
+
var len2 = ret[1];
|
|
610
|
+
if (ret[3]) {
|
|
611
|
+
ptr2 = 0; len2 = 0;
|
|
612
|
+
throw takeFromExternrefTable0(ret[2]);
|
|
613
|
+
}
|
|
614
|
+
deferred3_0 = ptr2;
|
|
615
|
+
deferred3_1 = len2;
|
|
616
|
+
return getStringFromWasm0(ptr2, len2);
|
|
617
|
+
} finally {
|
|
618
|
+
wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
/**
|
|
623
|
+
* Compute the Hamming distance between two ELID strings.
|
|
624
|
+
*
|
|
625
|
+
* Returns the number of differing bits between two Mini128 ELIDs.
|
|
626
|
+
* This distance is proportional to the angular distance between the
|
|
627
|
+
* original embeddings (lower = more similar).
|
|
628
|
+
*
|
|
629
|
+
* # Requirements
|
|
630
|
+
*
|
|
631
|
+
* Both ELIDs must use the Mini128 profile.
|
|
632
|
+
*
|
|
633
|
+
* # Parameters
|
|
634
|
+
*
|
|
635
|
+
* - `elid1`: First ELID string
|
|
636
|
+
* - `elid2`: Second ELID string
|
|
637
|
+
*
|
|
638
|
+
* # Returns
|
|
639
|
+
*
|
|
640
|
+
* Hamming distance (0-128). 0 means identical, 128 means completely different.
|
|
641
|
+
*
|
|
642
|
+
* # JavaScript Example
|
|
643
|
+
*
|
|
644
|
+
* ```javascript
|
|
645
|
+
* import { encodeElid, elidHammingDistance, ElidProfile } from 'elid';
|
|
646
|
+
*
|
|
647
|
+
* const elid1 = encodeElid(embedding1, ElidProfile.Mini128);
|
|
648
|
+
* const elid2 = encodeElid(embedding2, ElidProfile.Mini128);
|
|
649
|
+
*
|
|
650
|
+
* const distance = elidHammingDistance(elid1, elid2);
|
|
651
|
+
* if (distance < 20) {
|
|
652
|
+
* console.log("Very similar embeddings!");
|
|
653
|
+
* }
|
|
654
|
+
* ```
|
|
655
|
+
* @param {string} elid1
|
|
656
|
+
* @param {string} elid2
|
|
229
657
|
* @returns {number}
|
|
230
658
|
*/
|
|
231
|
-
export function
|
|
232
|
-
const ptr0 = passStringToWasm0(
|
|
659
|
+
export function elidHammingDistance(elid1, elid2) {
|
|
660
|
+
const ptr0 = passStringToWasm0(elid1, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
233
661
|
const len0 = WASM_VECTOR_LEN;
|
|
234
|
-
const ptr1 = passStringToWasm0(
|
|
662
|
+
const ptr1 = passStringToWasm0(elid2, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
235
663
|
const len1 = WASM_VECTOR_LEN;
|
|
236
|
-
const ret = wasm.
|
|
237
|
-
|
|
664
|
+
const ret = wasm.elidHammingDistance(ptr0, len0, ptr1, len1);
|
|
665
|
+
if (ret[2]) {
|
|
666
|
+
throw takeFromExternrefTable0(ret[1]);
|
|
667
|
+
}
|
|
668
|
+
return ret[0] >>> 0;
|
|
238
669
|
}
|
|
239
670
|
|
|
240
671
|
/**
|
|
241
|
-
*
|
|
672
|
+
* Find the best match for a query string in an array of candidates.
|
|
242
673
|
*
|
|
243
|
-
*
|
|
674
|
+
* Returns an object with the index and similarity score of the best match.
|
|
244
675
|
*
|
|
245
676
|
* # JavaScript Example
|
|
246
677
|
*
|
|
247
678
|
* ```javascript
|
|
248
|
-
* import {
|
|
679
|
+
* import { findBestMatch } from 'elid';
|
|
249
680
|
*
|
|
250
|
-
* const
|
|
251
|
-
*
|
|
681
|
+
* const candidates = ["apple", "application", "apply"];
|
|
682
|
+
* const result = findBestMatch("app", candidates);
|
|
683
|
+
* console.log(result); // { index: 0, score: 0.907 }
|
|
252
684
|
* ```
|
|
253
|
-
* @param {string}
|
|
254
|
-
* @param {string}
|
|
255
|
-
* @returns {
|
|
685
|
+
* @param {string} query
|
|
686
|
+
* @param {string[]} candidates
|
|
687
|
+
* @returns {object}
|
|
256
688
|
*/
|
|
257
|
-
export function
|
|
258
|
-
const ptr0 = passStringToWasm0(
|
|
689
|
+
export function findBestMatch(query, candidates) {
|
|
690
|
+
const ptr0 = passStringToWasm0(query, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
259
691
|
const len0 = WASM_VECTOR_LEN;
|
|
260
|
-
const ptr1 =
|
|
692
|
+
const ptr1 = passArrayJsValueToWasm0(candidates, wasm.__wbindgen_malloc);
|
|
261
693
|
const len1 = WASM_VECTOR_LEN;
|
|
262
|
-
const ret = wasm.
|
|
694
|
+
const ret = wasm.findBestMatch(ptr0, len0, ptr1, len1);
|
|
263
695
|
return ret;
|
|
264
696
|
}
|
|
265
697
|
|
|
266
698
|
/**
|
|
267
|
-
*
|
|
699
|
+
* Encode an embedding with percentage-based compression.
|
|
700
|
+
*
|
|
701
|
+
* The retention percentage (0.0-1.0) controls how much information is preserved:
|
|
702
|
+
* - 1.0 = lossless (Full32 precision, all dimensions)
|
|
703
|
+
* - 0.5 = half precision and/or half dimensions
|
|
704
|
+
* - 0.25 = quarter precision and/or quarter dimensions
|
|
705
|
+
*
|
|
706
|
+
* The algorithm optimizes for dimension reduction first (which preserves
|
|
707
|
+
* more geometric relationships) before reducing precision.
|
|
708
|
+
*
|
|
709
|
+
* # Parameters
|
|
710
|
+
*
|
|
711
|
+
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
712
|
+
* - `retention_pct`: Information retention percentage (0.0-1.0)
|
|
713
|
+
*
|
|
714
|
+
* # Returns
|
|
715
|
+
*
|
|
716
|
+
* A base32hex-encoded ELID string.
|
|
717
|
+
*
|
|
718
|
+
* # JavaScript Example
|
|
719
|
+
*
|
|
720
|
+
* ```javascript
|
|
721
|
+
* import { encodeElidCompressed } from 'elid';
|
|
722
|
+
*
|
|
723
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
724
|
+
*
|
|
725
|
+
* // 50% retention - good balance of size and fidelity
|
|
726
|
+
* const elid = encodeElidCompressed(embedding, 0.5);
|
|
727
|
+
*
|
|
728
|
+
* // 25% retention - smaller but less accurate
|
|
729
|
+
* const smallElid = encodeElidCompressed(embedding, 0.25);
|
|
730
|
+
* ```
|
|
731
|
+
* @param {Float64Array} embedding
|
|
732
|
+
* @param {number} retention_pct
|
|
733
|
+
* @returns {string}
|
|
734
|
+
*/
|
|
735
|
+
export function encodeElidCompressed(embedding, retention_pct) {
|
|
736
|
+
let deferred3_0;
|
|
737
|
+
let deferred3_1;
|
|
738
|
+
try {
|
|
739
|
+
const ptr0 = passArrayF64ToWasm0(embedding, wasm.__wbindgen_malloc);
|
|
740
|
+
const len0 = WASM_VECTOR_LEN;
|
|
741
|
+
const ret = wasm.encodeElidCompressed(ptr0, len0, retention_pct);
|
|
742
|
+
var ptr2 = ret[0];
|
|
743
|
+
var len2 = ret[1];
|
|
744
|
+
if (ret[3]) {
|
|
745
|
+
ptr2 = 0; len2 = 0;
|
|
746
|
+
throw takeFromExternrefTable0(ret[2]);
|
|
747
|
+
}
|
|
748
|
+
deferred3_0 = ptr2;
|
|
749
|
+
deferred3_1 = len2;
|
|
750
|
+
return getStringFromWasm0(ptr2, len2);
|
|
751
|
+
} finally {
|
|
752
|
+
wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
|
|
753
|
+
}
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
/**
|
|
757
|
+
* Compute the Jaro similarity between two strings.
|
|
268
758
|
*
|
|
269
759
|
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
270
|
-
*
|
|
760
|
+
* Particularly effective for short strings like names.
|
|
271
761
|
*
|
|
272
762
|
* # JavaScript Example
|
|
273
763
|
*
|
|
274
764
|
* ```javascript
|
|
275
|
-
* import {
|
|
765
|
+
* import { jaro } from 'elid';
|
|
276
766
|
*
|
|
277
|
-
* const similarity =
|
|
278
|
-
* console.log(similarity); // ~0.
|
|
767
|
+
* const similarity = jaro("martha", "marhta");
|
|
768
|
+
* console.log(similarity); // ~0.944
|
|
279
769
|
* ```
|
|
280
770
|
* @param {string} a
|
|
281
771
|
* @param {string} b
|
|
282
772
|
* @returns {number}
|
|
283
773
|
*/
|
|
284
|
-
export function
|
|
774
|
+
export function jaro(a, b) {
|
|
285
775
|
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
286
776
|
const len0 = WASM_VECTOR_LEN;
|
|
287
777
|
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
288
778
|
const len1 = WASM_VECTOR_LEN;
|
|
289
|
-
const ret = wasm.
|
|
779
|
+
const ret = wasm.jaro(ptr0, len0, ptr1, len1);
|
|
290
780
|
return ret;
|
|
291
781
|
}
|
|
292
782
|
|
|
293
783
|
/**
|
|
294
|
-
*
|
|
784
|
+
* Encode an embedding using lossless full vector encoding.
|
|
295
785
|
*
|
|
296
|
-
*
|
|
786
|
+
* Preserves the exact embedding values (32-bit float precision) and all dimensions.
|
|
787
|
+
* This produces the largest output but allows exact reconstruction.
|
|
788
|
+
*
|
|
789
|
+
* # Parameters
|
|
790
|
+
*
|
|
791
|
+
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
792
|
+
*
|
|
793
|
+
* # Returns
|
|
794
|
+
*
|
|
795
|
+
* A base32hex-encoded ELID string that can be decoded back to the original embedding.
|
|
297
796
|
*
|
|
298
797
|
* # JavaScript Example
|
|
299
798
|
*
|
|
300
799
|
* ```javascript
|
|
301
|
-
* import {
|
|
800
|
+
* import { encodeElidLossless, decodeElidToEmbedding } from 'elid';
|
|
302
801
|
*
|
|
303
|
-
* const
|
|
304
|
-
* const
|
|
305
|
-
* const distance = simhashDistance(hash1, hash2);
|
|
802
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
803
|
+
* const elid = encodeElidLossless(embedding);
|
|
306
804
|
*
|
|
307
|
-
*
|
|
805
|
+
* // Later, recover the exact embedding
|
|
806
|
+
* const recovered = decodeElidToEmbedding(elid);
|
|
807
|
+
* // recovered is identical to embedding
|
|
308
808
|
* ```
|
|
309
|
-
* @param {
|
|
310
|
-
* @
|
|
311
|
-
* @returns {number}
|
|
809
|
+
* @param {Float64Array} embedding
|
|
810
|
+
* @returns {string}
|
|
312
811
|
*/
|
|
313
|
-
export function
|
|
314
|
-
|
|
315
|
-
|
|
812
|
+
export function encodeElidLossless(embedding) {
|
|
813
|
+
let deferred3_0;
|
|
814
|
+
let deferred3_1;
|
|
815
|
+
try {
|
|
816
|
+
const ptr0 = passArrayF64ToWasm0(embedding, wasm.__wbindgen_malloc);
|
|
817
|
+
const len0 = WASM_VECTOR_LEN;
|
|
818
|
+
const ret = wasm.encodeElidLossless(ptr0, len0);
|
|
819
|
+
var ptr2 = ret[0];
|
|
820
|
+
var len2 = ret[1];
|
|
821
|
+
if (ret[3]) {
|
|
822
|
+
ptr2 = 0; len2 = 0;
|
|
823
|
+
throw takeFromExternrefTable0(ret[2]);
|
|
824
|
+
}
|
|
825
|
+
deferred3_0 = ptr2;
|
|
826
|
+
deferred3_1 = len2;
|
|
827
|
+
return getStringFromWasm0(ptr2, len2);
|
|
828
|
+
} finally {
|
|
829
|
+
wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
|
|
830
|
+
}
|
|
316
831
|
}
|
|
317
832
|
|
|
318
833
|
/**
|
|
319
|
-
*
|
|
834
|
+
* Decode an ELID string back to an embedding vector.
|
|
320
835
|
*
|
|
321
|
-
*
|
|
836
|
+
* Only works for ELIDs encoded with a FullVector profile (lossless,
|
|
837
|
+
* compressed, or max_length). Returns null for non-reversible profiles
|
|
838
|
+
* like Mini128, Morton, or Hilbert.
|
|
839
|
+
*
|
|
840
|
+
* # Parameters
|
|
841
|
+
*
|
|
842
|
+
* - `elid_str`: A valid ELID string (base32hex encoded)
|
|
843
|
+
*
|
|
844
|
+
* # Returns
|
|
845
|
+
*
|
|
846
|
+
* A Float64Array containing the decoded embedding, or null if the ELID
|
|
847
|
+
* is not reversible.
|
|
848
|
+
*
|
|
849
|
+
* Note: If dimension reduction was used during encoding, the decoded
|
|
850
|
+
* embedding will be in the reduced dimension space, not the original.
|
|
322
851
|
*
|
|
323
852
|
* # JavaScript Example
|
|
324
853
|
*
|
|
325
854
|
* ```javascript
|
|
326
|
-
* import {
|
|
855
|
+
* import { encodeElidLossless, decodeElidToEmbedding, isElidReversible } from 'elid';
|
|
327
856
|
*
|
|
328
|
-
* const
|
|
329
|
-
*
|
|
857
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
858
|
+
* const elid = encodeElidLossless(embedding);
|
|
330
859
|
*
|
|
331
|
-
*
|
|
332
|
-
*
|
|
860
|
+
* if (isElidReversible(elid)) {
|
|
861
|
+
* const recovered = decodeElidToEmbedding(elid);
|
|
862
|
+
* console.log(recovered.length); // 768
|
|
863
|
+
* }
|
|
333
864
|
* ```
|
|
334
|
-
* @param {string}
|
|
335
|
-
* @
|
|
336
|
-
* @returns {number}
|
|
865
|
+
* @param {string} elid_str
|
|
866
|
+
* @returns {any}
|
|
337
867
|
*/
|
|
338
|
-
export function
|
|
339
|
-
const ptr0 = passStringToWasm0(
|
|
868
|
+
export function decodeElidToEmbedding(elid_str) {
|
|
869
|
+
const ptr0 = passStringToWasm0(elid_str, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
340
870
|
const len0 = WASM_VECTOR_LEN;
|
|
341
|
-
const
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
871
|
+
const ret = wasm.decodeElidToEmbedding(ptr0, len0);
|
|
872
|
+
if (ret[2]) {
|
|
873
|
+
throw takeFromExternrefTable0(ret[1]);
|
|
874
|
+
}
|
|
875
|
+
return takeFromExternrefTable0(ret[0]);
|
|
345
876
|
}
|
|
346
877
|
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
878
|
+
/**
|
|
879
|
+
* Encode an embedding vector to an ELID string.
|
|
880
|
+
*
|
|
881
|
+
* Converts a high-dimensional embedding (64-2048 dimensions) into a compact,
|
|
882
|
+
* sortable identifier. The ELID preserves locality properties for efficient
|
|
883
|
+
* similarity search.
|
|
884
|
+
*
|
|
885
|
+
* # Parameters
|
|
886
|
+
*
|
|
887
|
+
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
888
|
+
* - `profile`: Encoding profile (Mini128, Morton10x10, or Hilbert10x10)
|
|
889
|
+
*
|
|
890
|
+
* # Returns
|
|
891
|
+
*
|
|
892
|
+
* A base32hex-encoded ELID string suitable for storage and comparison.
|
|
893
|
+
*
|
|
894
|
+
* # JavaScript Example
|
|
895
|
+
*
|
|
896
|
+
* ```javascript
|
|
897
|
+
* import { encodeElid, ElidProfile } from 'elid';
|
|
898
|
+
*
|
|
899
|
+
* // OpenAI embeddings are 1536 dimensions
|
|
900
|
+
* const embedding = await getEmbedding("Hello world");
|
|
901
|
+
* const elid = encodeElid(embedding, ElidProfile.Mini128);
|
|
902
|
+
* console.log(elid); // "012345abcdef..."
|
|
903
|
+
* ```
|
|
904
|
+
* @param {Float64Array} embedding
|
|
905
|
+
* @param {ElidProfile} profile
|
|
906
|
+
* @returns {string}
|
|
907
|
+
*/
|
|
908
|
+
export function encodeElid(embedding, profile) {
|
|
909
|
+
let deferred3_0;
|
|
910
|
+
let deferred3_1;
|
|
911
|
+
try {
|
|
912
|
+
const ptr0 = passArrayF64ToWasm0(embedding, wasm.__wbindgen_malloc);
|
|
913
|
+
const len0 = WASM_VECTOR_LEN;
|
|
914
|
+
const ret = wasm.encodeElid(ptr0, len0, profile);
|
|
915
|
+
var ptr2 = ret[0];
|
|
916
|
+
var len2 = ret[1];
|
|
917
|
+
if (ret[3]) {
|
|
918
|
+
ptr2 = 0; len2 = 0;
|
|
919
|
+
throw takeFromExternrefTable0(ret[2]);
|
|
920
|
+
}
|
|
921
|
+
deferred3_0 = ptr2;
|
|
922
|
+
deferred3_1 = len2;
|
|
923
|
+
return getStringFromWasm0(ptr2, len2);
|
|
924
|
+
} finally {
|
|
925
|
+
wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
|
|
350
926
|
}
|
|
351
927
|
}
|
|
928
|
+
|
|
352
929
|
/**
|
|
353
|
-
* Compute
|
|
930
|
+
* Compute the normalized SimHash similarity between two strings.
|
|
931
|
+
*
|
|
932
|
+
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
354
933
|
*
|
|
355
934
|
* # JavaScript Example
|
|
356
935
|
*
|
|
357
936
|
* ```javascript
|
|
358
|
-
* import {
|
|
937
|
+
* import { simhashSimilarity } from 'elid';
|
|
359
938
|
*
|
|
360
|
-
* const
|
|
361
|
-
*
|
|
362
|
-
* opts.setTrimWhitespace(true);
|
|
939
|
+
* const similarity = simhashSimilarity("iPhone 14", "iPhone 15");
|
|
940
|
+
* console.log(similarity); // ~0.9 (very similar)
|
|
363
941
|
*
|
|
364
|
-
* const
|
|
365
|
-
* console.log(
|
|
942
|
+
* const similarity2 = simhashSimilarity("iPhone", "Galaxy");
|
|
943
|
+
* console.log(similarity2); // ~0.4 (different)
|
|
366
944
|
* ```
|
|
367
945
|
* @param {string} a
|
|
368
946
|
* @param {string} b
|
|
369
|
-
* @param {SimilarityOptions} opts
|
|
370
947
|
* @returns {number}
|
|
371
948
|
*/
|
|
372
|
-
export function
|
|
949
|
+
export function simhashSimilarity(a, b) {
|
|
373
950
|
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
374
951
|
const len0 = WASM_VECTOR_LEN;
|
|
375
952
|
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
376
953
|
const len1 = WASM_VECTOR_LEN;
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
const ret = wasm.levenshteinWithOpts(ptr0, len0, ptr1, len1, ptr2);
|
|
380
|
-
return ret >>> 0;
|
|
954
|
+
const ret = wasm.simhashSimilarity(ptr0, len0, ptr1, len1);
|
|
955
|
+
return ret;
|
|
381
956
|
}
|
|
382
957
|
|
|
383
958
|
/**
|
|
@@ -411,137 +986,84 @@ export function hamming(a, b) {
|
|
|
411
986
|
}
|
|
412
987
|
|
|
413
988
|
/**
|
|
414
|
-
* Compute the
|
|
989
|
+
* Compute the best matching similarity between two strings.
|
|
415
990
|
*
|
|
416
|
-
*
|
|
417
|
-
* Particularly effective for short strings like names.
|
|
991
|
+
* Runs multiple algorithms and returns the highest score.
|
|
418
992
|
*
|
|
419
993
|
* # JavaScript Example
|
|
420
994
|
*
|
|
421
995
|
* ```javascript
|
|
422
|
-
* import {
|
|
996
|
+
* import { bestMatch } from 'elid';
|
|
423
997
|
*
|
|
424
|
-
* const
|
|
425
|
-
* console.log(
|
|
998
|
+
* const score = bestMatch("hello", "hallo");
|
|
999
|
+
* console.log(score); // ~0.8
|
|
426
1000
|
* ```
|
|
427
1001
|
* @param {string} a
|
|
428
1002
|
* @param {string} b
|
|
429
1003
|
* @returns {number}
|
|
430
1004
|
*/
|
|
431
|
-
export function
|
|
1005
|
+
export function bestMatch(a, b) {
|
|
432
1006
|
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
433
1007
|
const len0 = WASM_VECTOR_LEN;
|
|
434
1008
|
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
435
1009
|
const len1 = WASM_VECTOR_LEN;
|
|
436
|
-
const ret = wasm.
|
|
1010
|
+
const ret = wasm.bestMatch(ptr0, len0, ptr1, len1);
|
|
437
1011
|
return ret;
|
|
438
1012
|
}
|
|
439
1013
|
|
|
440
1014
|
/**
|
|
441
|
-
* Compute the
|
|
1015
|
+
* Compute the Jaro-Winkler similarity between two strings.
|
|
442
1016
|
*
|
|
443
|
-
* Returns
|
|
1017
|
+
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
1018
|
+
* Gives more favorable ratings to strings with common prefixes.
|
|
444
1019
|
*
|
|
445
1020
|
* # JavaScript Example
|
|
446
1021
|
*
|
|
447
1022
|
* ```javascript
|
|
448
|
-
* import {
|
|
1023
|
+
* import { jaroWinkler } from 'elid';
|
|
449
1024
|
*
|
|
450
|
-
* const
|
|
451
|
-
* console.log(
|
|
1025
|
+
* const similarity = jaroWinkler("martha", "marhta");
|
|
1026
|
+
* console.log(similarity); // ~0.961
|
|
452
1027
|
* ```
|
|
453
1028
|
* @param {string} a
|
|
454
1029
|
* @param {string} b
|
|
455
1030
|
* @returns {number}
|
|
456
1031
|
*/
|
|
457
|
-
export function
|
|
1032
|
+
export function jaroWinkler(a, b) {
|
|
458
1033
|
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
459
1034
|
const len0 = WASM_VECTOR_LEN;
|
|
460
1035
|
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
461
1036
|
const len1 = WASM_VECTOR_LEN;
|
|
462
|
-
const ret = wasm.
|
|
463
|
-
return ret
|
|
1037
|
+
const ret = wasm.jaroWinkler(ptr0, len0, ptr1, len1);
|
|
1038
|
+
return ret;
|
|
464
1039
|
}
|
|
465
1040
|
|
|
466
1041
|
/**
|
|
467
|
-
* Compute the
|
|
1042
|
+
* Compute the normalized Levenshtein similarity between two strings.
|
|
468
1043
|
*
|
|
469
|
-
*
|
|
1044
|
+
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
470
1045
|
*
|
|
471
1046
|
* # JavaScript Example
|
|
472
1047
|
*
|
|
473
1048
|
* ```javascript
|
|
474
|
-
* import {
|
|
1049
|
+
* import { normalizedLevenshtein } from 'elid';
|
|
475
1050
|
*
|
|
476
|
-
* const
|
|
477
|
-
* console.log(
|
|
1051
|
+
* const similarity = normalizedLevenshtein("hello", "hallo");
|
|
1052
|
+
* console.log(similarity); // ~0.8
|
|
478
1053
|
* ```
|
|
479
1054
|
* @param {string} a
|
|
480
1055
|
* @param {string} b
|
|
481
1056
|
* @returns {number}
|
|
482
1057
|
*/
|
|
483
|
-
export function
|
|
1058
|
+
export function normalizedLevenshtein(a, b) {
|
|
484
1059
|
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
485
1060
|
const len0 = WASM_VECTOR_LEN;
|
|
486
1061
|
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
487
1062
|
const len1 = WASM_VECTOR_LEN;
|
|
488
|
-
const ret = wasm.
|
|
489
|
-
return ret >>> 0;
|
|
490
|
-
}
|
|
491
|
-
|
|
492
|
-
function passArrayJsValueToWasm0(array, malloc) {
|
|
493
|
-
const ptr = malloc(array.length * 4, 4) >>> 0;
|
|
494
|
-
for (let i = 0; i < array.length; i++) {
|
|
495
|
-
const add = addToExternrefTable0(array[i]);
|
|
496
|
-
getDataViewMemory0().setUint32(ptr + 4 * i, add, true);
|
|
497
|
-
}
|
|
498
|
-
WASM_VECTOR_LEN = array.length;
|
|
499
|
-
return ptr;
|
|
500
|
-
}
|
|
501
|
-
/**
|
|
502
|
-
* Find all matches above a threshold score.
|
|
503
|
-
*
|
|
504
|
-
* Returns an array of objects with index and score for all candidates above the threshold.
|
|
505
|
-
*
|
|
506
|
-
* # JavaScript Example
|
|
507
|
-
*
|
|
508
|
-
* ```javascript
|
|
509
|
-
* import { findMatchesAboveThreshold } from 'elid';
|
|
510
|
-
*
|
|
511
|
-
* const candidates = ["apple", "application", "apply", "banana"];
|
|
512
|
-
* const matches = findMatchesAboveThreshold("app", candidates, 0.5);
|
|
513
|
-
* console.log(matches); // [{ index: 0, score: 0.907 }, { index: 1, score: 0.830 }, ...]
|
|
514
|
-
* ```
|
|
515
|
-
* @param {string} query
|
|
516
|
-
* @param {string[]} candidates
|
|
517
|
-
* @param {number} threshold
|
|
518
|
-
* @returns {any}
|
|
519
|
-
*/
|
|
520
|
-
export function findMatchesAboveThreshold(query, candidates, threshold) {
|
|
521
|
-
const ptr0 = passStringToWasm0(query, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
522
|
-
const len0 = WASM_VECTOR_LEN;
|
|
523
|
-
const ptr1 = passArrayJsValueToWasm0(candidates, wasm.__wbindgen_malloc);
|
|
524
|
-
const len1 = WASM_VECTOR_LEN;
|
|
525
|
-
const ret = wasm.findMatchesAboveThreshold(ptr0, len0, ptr1, len1, threshold);
|
|
1063
|
+
const ret = wasm.normalizedLevenshtein(ptr0, len0, ptr1, len1);
|
|
526
1064
|
return ret;
|
|
527
1065
|
}
|
|
528
1066
|
|
|
529
|
-
let cachedFloat64ArrayMemory0 = null;
|
|
530
|
-
|
|
531
|
-
function getFloat64ArrayMemory0() {
|
|
532
|
-
if (cachedFloat64ArrayMemory0 === null || cachedFloat64ArrayMemory0.byteLength === 0) {
|
|
533
|
-
cachedFloat64ArrayMemory0 = new Float64Array(wasm.memory.buffer);
|
|
534
|
-
}
|
|
535
|
-
return cachedFloat64ArrayMemory0;
|
|
536
|
-
}
|
|
537
|
-
|
|
538
|
-
function passArrayF64ToWasm0(arg, malloc) {
|
|
539
|
-
const ptr = malloc(arg.length * 8, 8) >>> 0;
|
|
540
|
-
getFloat64ArrayMemory0().set(arg, ptr / 8);
|
|
541
|
-
WASM_VECTOR_LEN = arg.length;
|
|
542
|
-
return ptr;
|
|
543
|
-
}
|
|
544
|
-
|
|
545
1067
|
let cachedUint32ArrayMemory0 = null;
|
|
546
1068
|
|
|
547
1069
|
function getUint32ArrayMemory0() {
|
|
@@ -588,31 +1110,98 @@ export function findSimilarHashes(query_hash, candidate_hashes, max_distance) {
|
|
|
588
1110
|
}
|
|
589
1111
|
|
|
590
1112
|
/**
|
|
591
|
-
*
|
|
1113
|
+
* Dimension handling mode for full vector encoding.
|
|
592
1114
|
*
|
|
593
|
-
*
|
|
1115
|
+
* Controls whether to preserve original dimensions, reduce them,
|
|
1116
|
+
* or project to a common space for cross-dimensional comparison.
|
|
594
1117
|
*
|
|
595
1118
|
* # JavaScript Example
|
|
596
1119
|
*
|
|
597
1120
|
* ```javascript
|
|
598
|
-
* import {
|
|
1121
|
+
* import { ElidDimensionMode, encodeElidFullVector } from 'elid';
|
|
599
1122
|
*
|
|
600
|
-
*
|
|
601
|
-
*
|
|
602
|
-
*
|
|
1123
|
+
* // Preserve all dimensions
|
|
1124
|
+
* // Reduce to fewer dimensions for smaller output
|
|
1125
|
+
* // Common space for comparing different-sized embeddings
|
|
603
1126
|
* ```
|
|
604
|
-
* @
|
|
605
|
-
* @param {string[]} candidates
|
|
606
|
-
* @returns {object}
|
|
1127
|
+
* @enum {0 | 1 | 2}
|
|
607
1128
|
*/
|
|
608
|
-
export
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
1129
|
+
export const ElidDimensionMode = Object.freeze({
|
|
1130
|
+
/**
|
|
1131
|
+
* Preserve all original dimensions (no projection)
|
|
1132
|
+
*/
|
|
1133
|
+
Preserve: 0, "0": "Preserve",
|
|
1134
|
+
/**
|
|
1135
|
+
* Reduce dimensions using random projection
|
|
1136
|
+
*/
|
|
1137
|
+
Reduce: 1, "1": "Reduce",
|
|
1138
|
+
/**
|
|
1139
|
+
* Project to common space for cross-dimensional comparison
|
|
1140
|
+
*/
|
|
1141
|
+
Common: 2, "2": "Common",
|
|
1142
|
+
});
|
|
1143
|
+
/**
|
|
1144
|
+
* ELID encoding profile for vector embeddings.
|
|
1145
|
+
*
|
|
1146
|
+
* Choose a profile based on your use case:
|
|
1147
|
+
* - `Mini128`: Fast 128-bit SimHash, good for similarity via Hamming distance
|
|
1148
|
+
* - `Morton10x10`: Z-order curve encoding, good for range queries
|
|
1149
|
+
* - `Hilbert10x10`: Hilbert curve encoding, best locality preservation
|
|
1150
|
+
*
|
|
1151
|
+
* # JavaScript Example
|
|
1152
|
+
*
|
|
1153
|
+
* ```javascript
|
|
1154
|
+
* import { ElidProfile, encodeElid } from 'elid';
|
|
1155
|
+
*
|
|
1156
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
1157
|
+
* const elid = encodeElid(embedding, ElidProfile.Mini128);
|
|
1158
|
+
* ```
|
|
1159
|
+
* @enum {0 | 1 | 2}
|
|
1160
|
+
*/
|
|
1161
|
+
export const ElidProfile = Object.freeze({
|
|
1162
|
+
/**
|
|
1163
|
+
* 128-bit SimHash (cosine similarity via Hamming distance)
|
|
1164
|
+
*/
|
|
1165
|
+
Mini128: 0, "0": "Mini128",
|
|
1166
|
+
/**
|
|
1167
|
+
* Morton/Z-order curve encoding (10 dims, 10 bits each)
|
|
1168
|
+
*/
|
|
1169
|
+
Morton10x10: 1, "1": "Morton10x10",
|
|
1170
|
+
/**
|
|
1171
|
+
* Hilbert curve encoding (10 dims, 10 bits each)
|
|
1172
|
+
*/
|
|
1173
|
+
Hilbert10x10: 2, "2": "Hilbert10x10",
|
|
1174
|
+
});
|
|
1175
|
+
/**
|
|
1176
|
+
* Precision options for full vector encoding.
|
|
1177
|
+
*
|
|
1178
|
+
* Controls how many bits are used to represent each dimension value.
|
|
1179
|
+
* Higher precision means more accurate reconstruction but larger output.
|
|
1180
|
+
*
|
|
1181
|
+
* # JavaScript Example
|
|
1182
|
+
*
|
|
1183
|
+
* ```javascript
|
|
1184
|
+
* import { ElidVectorPrecision, encodeElidWithPrecision } from 'elid';
|
|
1185
|
+
*
|
|
1186
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
1187
|
+
* // Full32 = lossless, Half16 = smaller with minimal error
|
|
1188
|
+
* ```
|
|
1189
|
+
* @enum {0 | 1 | 2}
|
|
1190
|
+
*/
|
|
1191
|
+
export const ElidVectorPrecision = Object.freeze({
|
|
1192
|
+
/**
|
|
1193
|
+
* Full 32-bit float (lossless, 4 bytes per dimension)
|
|
1194
|
+
*/
|
|
1195
|
+
Full32: 0, "0": "Full32",
|
|
1196
|
+
/**
|
|
1197
|
+
* 16-bit half-precision float (2 bytes per dimension)
|
|
1198
|
+
*/
|
|
1199
|
+
Half16: 1, "1": "Half16",
|
|
1200
|
+
/**
|
|
1201
|
+
* 8-bit quantized (1 byte per dimension, ~1% error)
|
|
1202
|
+
*/
|
|
1203
|
+
Quant8: 2, "2": "Quant8",
|
|
1204
|
+
});
|
|
616
1205
|
|
|
617
1206
|
const SimilarityOptionsFinalization = (typeof FinalizationRegistry === 'undefined')
|
|
618
1207
|
? { register: () => {}, unregister: () => {} }
|
|
@@ -757,6 +1346,11 @@ export function __wbg_new_e17d9f43105b08be() {
|
|
|
757
1346
|
return ret;
|
|
758
1347
|
};
|
|
759
1348
|
|
|
1349
|
+
export function __wbg_new_with_length_cd045ed0a87d4dd6(arg0) {
|
|
1350
|
+
const ret = new Float64Array(arg0 >>> 0);
|
|
1351
|
+
return ret;
|
|
1352
|
+
};
|
|
1353
|
+
|
|
760
1354
|
export function __wbg_set_3f1d0b984ed272ed(arg0, arg1, arg2) {
|
|
761
1355
|
arg0[arg1] = arg2;
|
|
762
1356
|
};
|
|
@@ -775,6 +1369,10 @@ export function __wbg_set_c2abbebe8b9ebee1() { return handleError(function (arg0
|
|
|
775
1369
|
return ret;
|
|
776
1370
|
}, arguments) };
|
|
777
1371
|
|
|
1372
|
+
export function __wbg_set_index_a0c01b257dd824f8(arg0, arg1, arg2) {
|
|
1373
|
+
arg0[arg1 >>> 0] = arg2;
|
|
1374
|
+
};
|
|
1375
|
+
|
|
778
1376
|
export function __wbindgen_cast_2241b6af4c4b2941(arg0, arg1) {
|
|
779
1377
|
// Cast intrinsic for `Ref(String) -> Externref`.
|
|
780
1378
|
const ret = getStringFromWasm0(arg0, arg1);
|