elid 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/elid.d.ts +265 -265
- package/elid_bg.js +467 -467
- package/elid_bg.wasm +0 -0
- package/package.json +1 -1
package/elid_bg.js
CHANGED
|
@@ -181,55 +181,34 @@ function handleError(f, args) {
|
|
|
181
181
|
}
|
|
182
182
|
}
|
|
183
183
|
/**
|
|
184
|
-
* Compute the
|
|
184
|
+
* Compute the SimHash fingerprint of a string.
|
|
185
185
|
*
|
|
186
|
-
*
|
|
186
|
+
* Returns a 64-bit hash where similar strings produce similar numbers.
|
|
187
|
+
* Use this for database queries by storing the hash and querying by numeric range.
|
|
187
188
|
*
|
|
188
189
|
* # JavaScript Example
|
|
189
190
|
*
|
|
190
191
|
* ```javascript
|
|
191
|
-
* import {
|
|
192
|
-
*
|
|
193
|
-
* const distance = osaDistance("ca", "ac");
|
|
194
|
-
* console.log(distance); // 1 (transposition)
|
|
195
|
-
* ```
|
|
196
|
-
* @param {string} a
|
|
197
|
-
* @param {string} b
|
|
198
|
-
* @returns {number}
|
|
199
|
-
*/
|
|
200
|
-
export function osaDistance(a, b) {
|
|
201
|
-
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
202
|
-
const len0 = WASM_VECTOR_LEN;
|
|
203
|
-
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
204
|
-
const len1 = WASM_VECTOR_LEN;
|
|
205
|
-
const ret = wasm.osaDistance(ptr0, len0, ptr1, len1);
|
|
206
|
-
return ret >>> 0;
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
/**
|
|
210
|
-
* Compute the Levenshtein distance between two strings.
|
|
211
|
-
*
|
|
212
|
-
* Returns the minimum number of single-character edits needed to transform one string into another.
|
|
192
|
+
* import { simhash } from 'elid';
|
|
213
193
|
*
|
|
214
|
-
*
|
|
194
|
+
* const hash1 = simhash("iPhone 14");
|
|
195
|
+
* const hash2 = simhash("iPhone 15");
|
|
196
|
+
* const hash3 = simhash("Galaxy S23");
|
|
215
197
|
*
|
|
216
|
-
*
|
|
217
|
-
*
|
|
198
|
+
* // hash1 and hash2 will be numerically close
|
|
199
|
+
* // hash3 will be numerically distant
|
|
218
200
|
*
|
|
219
|
-
*
|
|
220
|
-
*
|
|
201
|
+
* // Store in database as bigint:
|
|
202
|
+
* // { name: "iPhone 14", simhash: hash1 }
|
|
221
203
|
* ```
|
|
222
|
-
* @param {string}
|
|
223
|
-
* @param {string} b
|
|
204
|
+
* @param {string} text
|
|
224
205
|
* @returns {number}
|
|
225
206
|
*/
|
|
226
|
-
export function
|
|
227
|
-
const ptr0 = passStringToWasm0(
|
|
207
|
+
export function simhash(text) {
|
|
208
|
+
const ptr0 = passStringToWasm0(text, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
228
209
|
const len0 = WASM_VECTOR_LEN;
|
|
229
|
-
const
|
|
230
|
-
|
|
231
|
-
const ret = wasm.levenshtein(ptr0, len0, ptr1, len1);
|
|
232
|
-
return ret >>> 0;
|
|
210
|
+
const ret = wasm.simhash(ptr0, len0);
|
|
211
|
+
return ret;
|
|
233
212
|
}
|
|
234
213
|
|
|
235
214
|
function passArrayJsValueToWasm0(array, malloc) {
|
|
@@ -269,31 +248,6 @@ export function findMatchesAboveThreshold(query, candidates, threshold) {
|
|
|
269
248
|
return ret;
|
|
270
249
|
}
|
|
271
250
|
|
|
272
|
-
/**
|
|
273
|
-
* Compute the Hamming distance between two SimHash values.
|
|
274
|
-
*
|
|
275
|
-
* Returns the number of differing bits. Lower values = higher similarity.
|
|
276
|
-
*
|
|
277
|
-
* # JavaScript Example
|
|
278
|
-
*
|
|
279
|
-
* ```javascript
|
|
280
|
-
* import { simhash, simhashDistance } from 'elid';
|
|
281
|
-
*
|
|
282
|
-
* const hash1 = simhash("iPhone 14");
|
|
283
|
-
* const hash2 = simhash("iPhone 15");
|
|
284
|
-
* const distance = simhashDistance(hash1, hash2);
|
|
285
|
-
*
|
|
286
|
-
* console.log(distance); // Low number = similar
|
|
287
|
-
* ```
|
|
288
|
-
* @param {number} hash1
|
|
289
|
-
* @param {number} hash2
|
|
290
|
-
* @returns {number}
|
|
291
|
-
*/
|
|
292
|
-
export function simhashDistance(hash1, hash2) {
|
|
293
|
-
const ret = wasm.simhashDistance(hash1, hash2);
|
|
294
|
-
return ret >>> 0;
|
|
295
|
-
}
|
|
296
|
-
|
|
297
251
|
let cachedFloat64ArrayMemory0 = null;
|
|
298
252
|
|
|
299
253
|
function getFloat64ArrayMemory0() {
|
|
@@ -316,45 +270,49 @@ function takeFromExternrefTable0(idx) {
|
|
|
316
270
|
return value;
|
|
317
271
|
}
|
|
318
272
|
/**
|
|
319
|
-
* Encode an embedding
|
|
273
|
+
* Encode an embedding for cross-dimensional comparison.
|
|
320
274
|
*
|
|
321
|
-
*
|
|
322
|
-
*
|
|
275
|
+
* Projects the embedding to a common dimension space, allowing comparison
|
|
276
|
+
* between embeddings of different original dimensions (e.g., 256d vs 768d).
|
|
323
277
|
*
|
|
324
278
|
* # Parameters
|
|
325
279
|
*
|
|
326
280
|
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
327
|
-
* - `
|
|
281
|
+
* - `common_dims`: Target dimension space (all vectors projected here)
|
|
328
282
|
*
|
|
329
283
|
* # Returns
|
|
330
284
|
*
|
|
331
|
-
* A base32hex-encoded ELID string
|
|
285
|
+
* A base32hex-encoded ELID string.
|
|
332
286
|
*
|
|
333
287
|
* # JavaScript Example
|
|
334
288
|
*
|
|
335
289
|
* ```javascript
|
|
336
|
-
* import {
|
|
290
|
+
* import { encodeElidCrossDimensional, decodeElidToEmbedding } from 'elid';
|
|
337
291
|
*
|
|
338
|
-
*
|
|
292
|
+
* // Different sized embeddings from different models
|
|
293
|
+
* const embedding256 = new Float64Array(256).fill(0.1);
|
|
294
|
+
* const embedding768 = new Float64Array(768).fill(0.1);
|
|
339
295
|
*
|
|
340
|
-
* //
|
|
341
|
-
* const
|
|
342
|
-
*
|
|
296
|
+
* // Project both to 128-dim common space
|
|
297
|
+
* const elid1 = encodeElidCrossDimensional(embedding256, 128);
|
|
298
|
+
* const elid2 = encodeElidCrossDimensional(embedding768, 128);
|
|
343
299
|
*
|
|
344
|
-
* //
|
|
345
|
-
* const
|
|
300
|
+
* // Now they can be compared directly (both decode to 128 dims)
|
|
301
|
+
* const dec1 = decodeElidToEmbedding(elid1);
|
|
302
|
+
* const dec2 = decodeElidToEmbedding(elid2);
|
|
303
|
+
* // Both have length 128
|
|
346
304
|
* ```
|
|
347
305
|
* @param {Float64Array} embedding
|
|
348
|
-
* @param {number}
|
|
306
|
+
* @param {number} common_dims
|
|
349
307
|
* @returns {string}
|
|
350
308
|
*/
|
|
351
|
-
export function
|
|
309
|
+
export function encodeElidCrossDimensional(embedding, common_dims) {
|
|
352
310
|
let deferred3_0;
|
|
353
311
|
let deferred3_1;
|
|
354
312
|
try {
|
|
355
313
|
const ptr0 = passArrayF64ToWasm0(embedding, wasm.__wbindgen_malloc);
|
|
356
314
|
const len0 = WASM_VECTOR_LEN;
|
|
357
|
-
const ret = wasm.
|
|
315
|
+
const ret = wasm.encodeElidCrossDimensional(ptr0, len0, common_dims);
|
|
358
316
|
var ptr2 = ret[0];
|
|
359
317
|
var len2 = ret[1];
|
|
360
318
|
if (ret[3]) {
|
|
@@ -369,154 +327,303 @@ export function encodeElidMaxLength(embedding, max_chars) {
|
|
|
369
327
|
}
|
|
370
328
|
}
|
|
371
329
|
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
330
|
+
let cachedUint32ArrayMemory0 = null;
|
|
331
|
+
|
|
332
|
+
function getUint32ArrayMemory0() {
|
|
333
|
+
if (cachedUint32ArrayMemory0 === null || cachedUint32ArrayMemory0.byteLength === 0) {
|
|
334
|
+
cachedUint32ArrayMemory0 = new Uint32Array(wasm.memory.buffer);
|
|
375
335
|
}
|
|
336
|
+
return cachedUint32ArrayMemory0;
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
function getArrayU32FromWasm0(ptr, len) {
|
|
340
|
+
ptr = ptr >>> 0;
|
|
341
|
+
return getUint32ArrayMemory0().subarray(ptr / 4, ptr / 4 + len);
|
|
376
342
|
}
|
|
377
343
|
/**
|
|
378
|
-
*
|
|
344
|
+
* Find all hashes within a given distance threshold.
|
|
345
|
+
*
|
|
346
|
+
* Useful for database queries - pre-compute hashes, then find similar ones.
|
|
379
347
|
*
|
|
380
348
|
* # JavaScript Example
|
|
381
349
|
*
|
|
382
350
|
* ```javascript
|
|
383
|
-
* import {
|
|
351
|
+
* import { simhash, findSimilarHashes } from 'elid';
|
|
384
352
|
*
|
|
385
|
-
* const
|
|
386
|
-
*
|
|
387
|
-
* opts.setTrimWhitespace(true);
|
|
353
|
+
* const candidates = ["iPhone 14 Pro", "iPhone 13", "Galaxy S23"];
|
|
354
|
+
* const hashes = candidates.map(s => simhash(s));
|
|
388
355
|
*
|
|
389
|
-
* const
|
|
390
|
-
*
|
|
356
|
+
* const queryHash = simhash("iPhone 14");
|
|
357
|
+
* const matches = findSimilarHashes(queryHash, hashes, 10);
|
|
358
|
+
*
|
|
359
|
+
* console.log(matches); // [0, 1] - indices of similar items
|
|
391
360
|
* ```
|
|
392
|
-
* @param {
|
|
393
|
-
* @param {
|
|
394
|
-
* @param {
|
|
395
|
-
* @returns {
|
|
361
|
+
* @param {number} query_hash
|
|
362
|
+
* @param {Float64Array} candidate_hashes
|
|
363
|
+
* @param {number} max_distance
|
|
364
|
+
* @returns {Uint32Array}
|
|
396
365
|
*/
|
|
397
|
-
export function
|
|
398
|
-
const ptr0 =
|
|
366
|
+
export function findSimilarHashes(query_hash, candidate_hashes, max_distance) {
|
|
367
|
+
const ptr0 = passArrayF64ToWasm0(candidate_hashes, wasm.__wbindgen_malloc);
|
|
399
368
|
const len0 = WASM_VECTOR_LEN;
|
|
400
|
-
const
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
const ret = wasm.levenshteinWithOpts(ptr0, len0, ptr1, len1, ptr2);
|
|
405
|
-
return ret >>> 0;
|
|
369
|
+
const ret = wasm.findSimilarHashes(query_hash, ptr0, len0, max_distance);
|
|
370
|
+
var v2 = getArrayU32FromWasm0(ret[0], ret[1]).slice();
|
|
371
|
+
wasm.__wbindgen_free(ret[0], ret[1] * 4, 4);
|
|
372
|
+
return v2;
|
|
406
373
|
}
|
|
407
374
|
|
|
408
375
|
/**
|
|
409
|
-
*
|
|
410
|
-
*
|
|
411
|
-
* Returns true if the ELID was encoded with a FullVector profile
|
|
412
|
-
* (lossless, compressed, or max_length), false otherwise.
|
|
413
|
-
*
|
|
414
|
-
* # Parameters
|
|
415
|
-
*
|
|
416
|
-
* - `elid_str`: A valid ELID string (base32hex encoded)
|
|
417
|
-
*
|
|
418
|
-
* # Returns
|
|
376
|
+
* Compute the Jaro similarity between two strings.
|
|
419
377
|
*
|
|
420
|
-
*
|
|
378
|
+
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
379
|
+
* Particularly effective for short strings like names.
|
|
421
380
|
*
|
|
422
381
|
* # JavaScript Example
|
|
423
382
|
*
|
|
424
383
|
* ```javascript
|
|
425
|
-
* import {
|
|
426
|
-
*
|
|
427
|
-
* const embedding = new Float64Array(768).fill(0.1);
|
|
428
|
-
*
|
|
429
|
-
* // Mini128 is NOT reversible
|
|
430
|
-
* const mini128Elid = encodeElid(embedding, ElidProfile.Mini128);
|
|
431
|
-
* console.log(isElidReversible(mini128Elid)); // false
|
|
384
|
+
* import { jaro } from 'elid';
|
|
432
385
|
*
|
|
433
|
-
*
|
|
434
|
-
*
|
|
435
|
-
* console.log(isElidReversible(losslessElid)); // true
|
|
386
|
+
* const similarity = jaro("martha", "marhta");
|
|
387
|
+
* console.log(similarity); // ~0.944
|
|
436
388
|
* ```
|
|
437
|
-
* @param {string}
|
|
438
|
-
* @
|
|
389
|
+
* @param {string} a
|
|
390
|
+
* @param {string} b
|
|
391
|
+
* @returns {number}
|
|
439
392
|
*/
|
|
440
|
-
export function
|
|
441
|
-
const ptr0 = passStringToWasm0(
|
|
393
|
+
export function jaro(a, b) {
|
|
394
|
+
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
442
395
|
const len0 = WASM_VECTOR_LEN;
|
|
443
|
-
const
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
return ret[0] !== 0;
|
|
396
|
+
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
397
|
+
const len1 = WASM_VECTOR_LEN;
|
|
398
|
+
const ret = wasm.jaro(ptr0, len0, ptr1, len1);
|
|
399
|
+
return ret;
|
|
448
400
|
}
|
|
449
401
|
|
|
450
402
|
/**
|
|
451
|
-
* Compute the
|
|
403
|
+
* Compute the best matching similarity between two strings.
|
|
452
404
|
*
|
|
453
|
-
*
|
|
454
|
-
* Use this for database queries by storing the hash and querying by numeric range.
|
|
405
|
+
* Runs multiple algorithms and returns the highest score.
|
|
455
406
|
*
|
|
456
407
|
* # JavaScript Example
|
|
457
408
|
*
|
|
458
409
|
* ```javascript
|
|
459
|
-
* import {
|
|
460
|
-
*
|
|
461
|
-
* const hash1 = simhash("iPhone 14");
|
|
462
|
-
* const hash2 = simhash("iPhone 15");
|
|
463
|
-
* const hash3 = simhash("Galaxy S23");
|
|
464
|
-
*
|
|
465
|
-
* // hash1 and hash2 will be numerically close
|
|
466
|
-
* // hash3 will be numerically distant
|
|
410
|
+
* import { bestMatch } from 'elid';
|
|
467
411
|
*
|
|
468
|
-
*
|
|
469
|
-
* //
|
|
412
|
+
* const score = bestMatch("hello", "hallo");
|
|
413
|
+
* console.log(score); // ~0.8
|
|
470
414
|
* ```
|
|
471
|
-
* @param {string}
|
|
415
|
+
* @param {string} a
|
|
416
|
+
* @param {string} b
|
|
472
417
|
* @returns {number}
|
|
473
418
|
*/
|
|
474
|
-
export function
|
|
475
|
-
const ptr0 = passStringToWasm0(
|
|
419
|
+
export function bestMatch(a, b) {
|
|
420
|
+
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
476
421
|
const len0 = WASM_VECTOR_LEN;
|
|
477
|
-
const
|
|
422
|
+
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
423
|
+
const len1 = WASM_VECTOR_LEN;
|
|
424
|
+
const ret = wasm.bestMatch(ptr0, len0, ptr1, len1);
|
|
478
425
|
return ret;
|
|
479
426
|
}
|
|
480
427
|
|
|
481
|
-
function getArrayU8FromWasm0(ptr, len) {
|
|
482
|
-
ptr = ptr >>> 0;
|
|
483
|
-
return getUint8ArrayMemory0().subarray(ptr / 1, ptr / 1 + len);
|
|
484
|
-
}
|
|
485
428
|
/**
|
|
486
|
-
*
|
|
429
|
+
* Encode an embedding vector to an ELID string.
|
|
487
430
|
*
|
|
488
|
-
*
|
|
489
|
-
*
|
|
431
|
+
* Converts a high-dimensional embedding (64-2048 dimensions) into a compact,
|
|
432
|
+
* sortable identifier. The ELID preserves locality properties for efficient
|
|
433
|
+
* similarity search.
|
|
490
434
|
*
|
|
491
435
|
* # Parameters
|
|
492
436
|
*
|
|
493
|
-
* - `
|
|
437
|
+
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
438
|
+
* - `profile`: Encoding profile (Mini128, Morton10x10, or Hilbert10x10)
|
|
494
439
|
*
|
|
495
440
|
* # Returns
|
|
496
441
|
*
|
|
497
|
-
* A
|
|
442
|
+
* A base32hex-encoded ELID string suitable for storage and comparison.
|
|
498
443
|
*
|
|
499
444
|
* # JavaScript Example
|
|
500
445
|
*
|
|
501
446
|
* ```javascript
|
|
502
|
-
* import {
|
|
447
|
+
* import { encodeElid, ElidProfile } from 'elid';
|
|
503
448
|
*
|
|
504
|
-
*
|
|
505
|
-
*
|
|
449
|
+
* // OpenAI embeddings are 1536 dimensions
|
|
450
|
+
* const embedding = await getEmbedding("Hello world");
|
|
451
|
+
* const elid = encodeElid(embedding, ElidProfile.Mini128);
|
|
452
|
+
* console.log(elid); // "012345abcdef..."
|
|
453
|
+
* ```
|
|
454
|
+
* @param {Float64Array} embedding
|
|
455
|
+
* @param {ElidProfile} profile
|
|
456
|
+
* @returns {string}
|
|
457
|
+
*/
|
|
458
|
+
export function encodeElid(embedding, profile) {
|
|
459
|
+
let deferred3_0;
|
|
460
|
+
let deferred3_1;
|
|
461
|
+
try {
|
|
462
|
+
const ptr0 = passArrayF64ToWasm0(embedding, wasm.__wbindgen_malloc);
|
|
463
|
+
const len0 = WASM_VECTOR_LEN;
|
|
464
|
+
const ret = wasm.encodeElid(ptr0, len0, profile);
|
|
465
|
+
var ptr2 = ret[0];
|
|
466
|
+
var len2 = ret[1];
|
|
467
|
+
if (ret[3]) {
|
|
468
|
+
ptr2 = 0; len2 = 0;
|
|
469
|
+
throw takeFromExternrefTable0(ret[2]);
|
|
470
|
+
}
|
|
471
|
+
deferred3_0 = ptr2;
|
|
472
|
+
deferred3_1 = len2;
|
|
473
|
+
return getStringFromWasm0(ptr2, len2);
|
|
474
|
+
} finally {
|
|
475
|
+
wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
/**
|
|
480
|
+
* Compute the normalized SimHash similarity between two strings.
|
|
481
|
+
*
|
|
482
|
+
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
483
|
+
*
|
|
484
|
+
* # JavaScript Example
|
|
485
|
+
*
|
|
486
|
+
* ```javascript
|
|
487
|
+
* import { simhashSimilarity } from 'elid';
|
|
488
|
+
*
|
|
489
|
+
* const similarity = simhashSimilarity("iPhone 14", "iPhone 15");
|
|
490
|
+
* console.log(similarity); // ~0.9 (very similar)
|
|
491
|
+
*
|
|
492
|
+
* const similarity2 = simhashSimilarity("iPhone", "Galaxy");
|
|
493
|
+
* console.log(similarity2); // ~0.4 (different)
|
|
494
|
+
* ```
|
|
495
|
+
* @param {string} a
|
|
496
|
+
* @param {string} b
|
|
497
|
+
* @returns {number}
|
|
498
|
+
*/
|
|
499
|
+
export function simhashSimilarity(a, b) {
|
|
500
|
+
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
501
|
+
const len0 = WASM_VECTOR_LEN;
|
|
502
|
+
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
503
|
+
const len1 = WASM_VECTOR_LEN;
|
|
504
|
+
const ret = wasm.simhashSimilarity(ptr0, len0, ptr1, len1);
|
|
505
|
+
return ret;
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
/**
|
|
509
|
+
* Encode an embedding using lossless full vector encoding.
|
|
510
|
+
*
|
|
511
|
+
* Preserves the exact embedding values (32-bit float precision) and all dimensions.
|
|
512
|
+
* This produces the largest output but allows exact reconstruction.
|
|
513
|
+
*
|
|
514
|
+
* # Parameters
|
|
515
|
+
*
|
|
516
|
+
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
517
|
+
*
|
|
518
|
+
* # Returns
|
|
519
|
+
*
|
|
520
|
+
* A base32hex-encoded ELID string that can be decoded back to the original embedding.
|
|
521
|
+
*
|
|
522
|
+
* # JavaScript Example
|
|
523
|
+
*
|
|
524
|
+
* ```javascript
|
|
525
|
+
* import { encodeElidLossless, decodeElidToEmbedding } from 'elid';
|
|
526
|
+
*
|
|
527
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
528
|
+
* const elid = encodeElidLossless(embedding);
|
|
529
|
+
*
|
|
530
|
+
* // Later, recover the exact embedding
|
|
531
|
+
* const recovered = decodeElidToEmbedding(elid);
|
|
532
|
+
* // recovered is identical to embedding
|
|
533
|
+
* ```
|
|
534
|
+
* @param {Float64Array} embedding
|
|
535
|
+
* @returns {string}
|
|
536
|
+
*/
|
|
537
|
+
export function encodeElidLossless(embedding) {
|
|
538
|
+
let deferred3_0;
|
|
539
|
+
let deferred3_1;
|
|
540
|
+
try {
|
|
541
|
+
const ptr0 = passArrayF64ToWasm0(embedding, wasm.__wbindgen_malloc);
|
|
542
|
+
const len0 = WASM_VECTOR_LEN;
|
|
543
|
+
const ret = wasm.encodeElidLossless(ptr0, len0);
|
|
544
|
+
var ptr2 = ret[0];
|
|
545
|
+
var len2 = ret[1];
|
|
546
|
+
if (ret[3]) {
|
|
547
|
+
ptr2 = 0; len2 = 0;
|
|
548
|
+
throw takeFromExternrefTable0(ret[2]);
|
|
549
|
+
}
|
|
550
|
+
deferred3_0 = ptr2;
|
|
551
|
+
deferred3_1 = len2;
|
|
552
|
+
return getStringFromWasm0(ptr2, len2);
|
|
553
|
+
} finally {
|
|
554
|
+
wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
/**
|
|
559
|
+
* Compute the normalized Levenshtein similarity between two strings.
|
|
560
|
+
*
|
|
561
|
+
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
562
|
+
*
|
|
563
|
+
* # JavaScript Example
|
|
564
|
+
*
|
|
565
|
+
* ```javascript
|
|
566
|
+
* import { normalizedLevenshtein } from 'elid';
|
|
567
|
+
*
|
|
568
|
+
* const similarity = normalizedLevenshtein("hello", "hallo");
|
|
569
|
+
* console.log(similarity); // ~0.8
|
|
570
|
+
* ```
|
|
571
|
+
* @param {string} a
|
|
572
|
+
* @param {string} b
|
|
573
|
+
* @returns {number}
|
|
574
|
+
*/
|
|
575
|
+
export function normalizedLevenshtein(a, b) {
|
|
576
|
+
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
577
|
+
const len0 = WASM_VECTOR_LEN;
|
|
578
|
+
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
579
|
+
const len1 = WASM_VECTOR_LEN;
|
|
580
|
+
const ret = wasm.normalizedLevenshtein(ptr0, len0, ptr1, len1);
|
|
581
|
+
return ret;
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
/**
|
|
585
|
+
* Decode an ELID string back to an embedding vector.
|
|
586
|
+
*
|
|
587
|
+
* Only works for ELIDs encoded with a FullVector profile (lossless,
|
|
588
|
+
* compressed, or max_length). Returns null for non-reversible profiles
|
|
589
|
+
* like Mini128, Morton, or Hilbert.
|
|
590
|
+
*
|
|
591
|
+
* # Parameters
|
|
592
|
+
*
|
|
593
|
+
* - `elid_str`: A valid ELID string (base32hex encoded)
|
|
594
|
+
*
|
|
595
|
+
* # Returns
|
|
596
|
+
*
|
|
597
|
+
* A Float64Array containing the decoded embedding, or null if the ELID
|
|
598
|
+
* is not reversible.
|
|
599
|
+
*
|
|
600
|
+
* Note: If dimension reduction was used during encoding, the decoded
|
|
601
|
+
* embedding will be in the reduced dimension space, not the original.
|
|
602
|
+
*
|
|
603
|
+
* # JavaScript Example
|
|
604
|
+
*
|
|
605
|
+
* ```javascript
|
|
606
|
+
* import { encodeElidLossless, decodeElidToEmbedding, isElidReversible } from 'elid';
|
|
607
|
+
*
|
|
608
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
609
|
+
* const elid = encodeElidLossless(embedding);
|
|
610
|
+
*
|
|
611
|
+
* if (isElidReversible(elid)) {
|
|
612
|
+
* const recovered = decodeElidToEmbedding(elid);
|
|
613
|
+
* console.log(recovered.length); // 768
|
|
614
|
+
* }
|
|
506
615
|
* ```
|
|
507
616
|
* @param {string} elid_str
|
|
508
|
-
* @returns {
|
|
617
|
+
* @returns {any}
|
|
509
618
|
*/
|
|
510
|
-
export function
|
|
619
|
+
export function decodeElidToEmbedding(elid_str) {
|
|
511
620
|
const ptr0 = passStringToWasm0(elid_str, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
512
621
|
const len0 = WASM_VECTOR_LEN;
|
|
513
|
-
const ret = wasm.
|
|
514
|
-
if (ret[
|
|
515
|
-
throw takeFromExternrefTable0(ret[
|
|
622
|
+
const ret = wasm.decodeElidToEmbedding(ptr0, len0);
|
|
623
|
+
if (ret[2]) {
|
|
624
|
+
throw takeFromExternrefTable0(ret[1]);
|
|
516
625
|
}
|
|
517
|
-
|
|
518
|
-
wasm.__wbindgen_free(ret[0], ret[1] * 1, 1);
|
|
519
|
-
return v2;
|
|
626
|
+
return takeFromExternrefTable0(ret[0]);
|
|
520
627
|
}
|
|
521
628
|
|
|
522
629
|
/**
|
|
@@ -561,62 +668,149 @@ export function getElidMetadata(elid_str) {
|
|
|
561
668
|
return takeFromExternrefTable0(ret[0]);
|
|
562
669
|
}
|
|
563
670
|
|
|
671
|
+
function getArrayU8FromWasm0(ptr, len) {
|
|
672
|
+
ptr = ptr >>> 0;
|
|
673
|
+
return getUint8ArrayMemory0().subarray(ptr / 1, ptr / 1 + len);
|
|
674
|
+
}
|
|
564
675
|
/**
|
|
565
|
-
*
|
|
676
|
+
* Decode an ELID string to raw bytes.
|
|
566
677
|
*
|
|
567
|
-
*
|
|
568
|
-
*
|
|
678
|
+
* Returns the raw byte representation of an ELID, including the header
|
|
679
|
+
* and payload bytes. Useful for custom processing or debugging.
|
|
569
680
|
*
|
|
570
681
|
* # Parameters
|
|
571
682
|
*
|
|
572
|
-
* - `
|
|
573
|
-
* - `common_dims`: Target dimension space (all vectors projected here)
|
|
683
|
+
* - `elid_str`: A valid ELID string (base32hex encoded)
|
|
574
684
|
*
|
|
575
685
|
* # Returns
|
|
576
686
|
*
|
|
577
|
-
* A
|
|
687
|
+
* A Uint8Array containing the raw bytes (header + payload).
|
|
578
688
|
*
|
|
579
689
|
* # JavaScript Example
|
|
580
690
|
*
|
|
581
691
|
* ```javascript
|
|
582
|
-
* import {
|
|
692
|
+
* import { decodeElid } from 'elid';
|
|
583
693
|
*
|
|
584
|
-
*
|
|
585
|
-
*
|
|
586
|
-
*
|
|
694
|
+
* const bytes = decodeElid("012345abcdef...");
|
|
695
|
+
* console.log(bytes); // Uint8Array [...]
|
|
696
|
+
* ```
|
|
697
|
+
* @param {string} elid_str
|
|
698
|
+
* @returns {Uint8Array}
|
|
699
|
+
*/
|
|
700
|
+
export function decodeElid(elid_str) {
|
|
701
|
+
const ptr0 = passStringToWasm0(elid_str, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
702
|
+
const len0 = WASM_VECTOR_LEN;
|
|
703
|
+
const ret = wasm.decodeElid(ptr0, len0);
|
|
704
|
+
if (ret[3]) {
|
|
705
|
+
throw takeFromExternrefTable0(ret[2]);
|
|
706
|
+
}
|
|
707
|
+
var v2 = getArrayU8FromWasm0(ret[0], ret[1]).slice();
|
|
708
|
+
wasm.__wbindgen_free(ret[0], ret[1] * 1, 1);
|
|
709
|
+
return v2;
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
/**
|
|
713
|
+
* Compute the Levenshtein distance between two strings.
|
|
587
714
|
*
|
|
588
|
-
*
|
|
589
|
-
* const elid1 = encodeElidCrossDimensional(embedding256, 128);
|
|
590
|
-
* const elid2 = encodeElidCrossDimensional(embedding768, 128);
|
|
715
|
+
* Returns the minimum number of single-character edits needed to transform one string into another.
|
|
591
716
|
*
|
|
592
|
-
*
|
|
593
|
-
*
|
|
594
|
-
*
|
|
595
|
-
*
|
|
717
|
+
* # JavaScript Example
|
|
718
|
+
*
|
|
719
|
+
* ```javascript
|
|
720
|
+
* import { levenshtein } from 'elid';
|
|
721
|
+
*
|
|
722
|
+
* const distance = levenshtein("kitten", "sitting");
|
|
723
|
+
* console.log(distance); // 3
|
|
596
724
|
* ```
|
|
597
|
-
* @param {
|
|
598
|
-
* @param {
|
|
599
|
-
* @returns {
|
|
725
|
+
* @param {string} a
|
|
726
|
+
* @param {string} b
|
|
727
|
+
* @returns {number}
|
|
600
728
|
*/
|
|
601
|
-
export function
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
729
|
+
export function levenshtein(a, b) {
|
|
730
|
+
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
731
|
+
const len0 = WASM_VECTOR_LEN;
|
|
732
|
+
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
733
|
+
const len1 = WASM_VECTOR_LEN;
|
|
734
|
+
const ret = wasm.levenshtein(ptr0, len0, ptr1, len1);
|
|
735
|
+
return ret >>> 0;
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
/**
|
|
739
|
+
* Check if an ELID can be decoded back to an embedding.
|
|
740
|
+
*
|
|
741
|
+
* Returns true if the ELID was encoded with a FullVector profile
|
|
742
|
+
* (lossless, compressed, or max_length), false otherwise.
|
|
743
|
+
*
|
|
744
|
+
* # Parameters
|
|
745
|
+
*
|
|
746
|
+
* - `elid_str`: A valid ELID string (base32hex encoded)
|
|
747
|
+
*
|
|
748
|
+
* # Returns
|
|
749
|
+
*
|
|
750
|
+
* `true` if decodeElidToEmbedding will return an embedding, `false` otherwise.
|
|
751
|
+
*
|
|
752
|
+
* # JavaScript Example
|
|
753
|
+
*
|
|
754
|
+
* ```javascript
|
|
755
|
+
* import { encodeElid, encodeElidLossless, isElidReversible, ElidProfile } from 'elid';
|
|
756
|
+
*
|
|
757
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
758
|
+
*
|
|
759
|
+
* // Mini128 is NOT reversible
|
|
760
|
+
* const mini128Elid = encodeElid(embedding, ElidProfile.Mini128);
|
|
761
|
+
* console.log(isElidReversible(mini128Elid)); // false
|
|
762
|
+
*
|
|
763
|
+
* // Lossless IS reversible
|
|
764
|
+
* const losslessElid = encodeElidLossless(embedding);
|
|
765
|
+
* console.log(isElidReversible(losslessElid)); // true
|
|
766
|
+
* ```
|
|
767
|
+
* @param {string} elid_str
|
|
768
|
+
* @returns {boolean}
|
|
769
|
+
*/
|
|
770
|
+
export function isElidReversible(elid_str) {
|
|
771
|
+
const ptr0 = passStringToWasm0(elid_str, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
772
|
+
const len0 = WASM_VECTOR_LEN;
|
|
773
|
+
const ret = wasm.isElidReversible(ptr0, len0);
|
|
774
|
+
if (ret[2]) {
|
|
775
|
+
throw takeFromExternrefTable0(ret[1]);
|
|
619
776
|
}
|
|
777
|
+
return ret[0] !== 0;
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
function _assertClass(instance, klass) {
|
|
781
|
+
if (!(instance instanceof klass)) {
|
|
782
|
+
throw new Error(`expected instance of ${klass.name}`);
|
|
783
|
+
}
|
|
784
|
+
}
|
|
785
|
+
/**
|
|
786
|
+
* Compute Levenshtein distance with custom options.
|
|
787
|
+
*
|
|
788
|
+
* # JavaScript Example
|
|
789
|
+
*
|
|
790
|
+
* ```javascript
|
|
791
|
+
* import { levenshteinWithOpts, SimilarityOptions } from 'elid';
|
|
792
|
+
*
|
|
793
|
+
* const opts = new SimilarityOptions();
|
|
794
|
+
* opts.setCaseSensitive(false);
|
|
795
|
+
* opts.setTrimWhitespace(true);
|
|
796
|
+
*
|
|
797
|
+
* const distance = levenshteinWithOpts(" HELLO ", "hello", opts);
|
|
798
|
+
* console.log(distance); // 0
|
|
799
|
+
* ```
|
|
800
|
+
* @param {string} a
|
|
801
|
+
* @param {string} b
|
|
802
|
+
* @param {SimilarityOptions} opts
|
|
803
|
+
* @returns {number}
|
|
804
|
+
*/
|
|
805
|
+
export function levenshteinWithOpts(a, b, opts) {
|
|
806
|
+
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
807
|
+
const len0 = WASM_VECTOR_LEN;
|
|
808
|
+
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
809
|
+
const len1 = WASM_VECTOR_LEN;
|
|
810
|
+
_assertClass(opts, SimilarityOptions);
|
|
811
|
+
var ptr2 = opts.__destroy_into_raw();
|
|
812
|
+
const ret = wasm.levenshteinWithOpts(ptr0, len0, ptr1, len1, ptr2);
|
|
813
|
+
return ret >>> 0;
|
|
620
814
|
}
|
|
621
815
|
|
|
622
816
|
/**
|
|
@@ -696,126 +890,100 @@ export function findBestMatch(query, candidates) {
|
|
|
696
890
|
}
|
|
697
891
|
|
|
698
892
|
/**
|
|
699
|
-
*
|
|
700
|
-
*
|
|
701
|
-
* The retention percentage (0.0-1.0) controls how much information is preserved:
|
|
702
|
-
* - 1.0 = lossless (Full32 precision, all dimensions)
|
|
703
|
-
* - 0.5 = half precision and/or half dimensions
|
|
704
|
-
* - 0.25 = quarter precision and/or quarter dimensions
|
|
705
|
-
*
|
|
706
|
-
* The algorithm optimizes for dimension reduction first (which preserves
|
|
707
|
-
* more geometric relationships) before reducing precision.
|
|
708
|
-
*
|
|
709
|
-
* # Parameters
|
|
710
|
-
*
|
|
711
|
-
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
712
|
-
* - `retention_pct`: Information retention percentage (0.0-1.0)
|
|
713
|
-
*
|
|
714
|
-
* # Returns
|
|
893
|
+
* Compute the OSA (Optimal String Alignment) distance between two strings.
|
|
715
894
|
*
|
|
716
|
-
*
|
|
895
|
+
* Similar to Levenshtein but also considers transpositions as a single operation.
|
|
717
896
|
*
|
|
718
897
|
* # JavaScript Example
|
|
719
898
|
*
|
|
720
899
|
* ```javascript
|
|
721
|
-
* import {
|
|
722
|
-
*
|
|
723
|
-
* const embedding = new Float64Array(768).fill(0.1);
|
|
724
|
-
*
|
|
725
|
-
* // 50% retention - good balance of size and fidelity
|
|
726
|
-
* const elid = encodeElidCompressed(embedding, 0.5);
|
|
900
|
+
* import { osaDistance } from 'elid';
|
|
727
901
|
*
|
|
728
|
-
*
|
|
729
|
-
*
|
|
902
|
+
* const distance = osaDistance("ca", "ac");
|
|
903
|
+
* console.log(distance); // 1 (transposition)
|
|
730
904
|
* ```
|
|
731
|
-
* @param {
|
|
732
|
-
* @param {
|
|
733
|
-
* @returns {
|
|
905
|
+
* @param {string} a
|
|
906
|
+
* @param {string} b
|
|
907
|
+
* @returns {number}
|
|
734
908
|
*/
|
|
735
|
-
export function
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
var ptr2 = ret[0];
|
|
743
|
-
var len2 = ret[1];
|
|
744
|
-
if (ret[3]) {
|
|
745
|
-
ptr2 = 0; len2 = 0;
|
|
746
|
-
throw takeFromExternrefTable0(ret[2]);
|
|
747
|
-
}
|
|
748
|
-
deferred3_0 = ptr2;
|
|
749
|
-
deferred3_1 = len2;
|
|
750
|
-
return getStringFromWasm0(ptr2, len2);
|
|
751
|
-
} finally {
|
|
752
|
-
wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
|
|
753
|
-
}
|
|
909
|
+
export function osaDistance(a, b) {
|
|
910
|
+
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
911
|
+
const len0 = WASM_VECTOR_LEN;
|
|
912
|
+
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
913
|
+
const len1 = WASM_VECTOR_LEN;
|
|
914
|
+
const ret = wasm.osaDistance(ptr0, len0, ptr1, len1);
|
|
915
|
+
return ret >>> 0;
|
|
754
916
|
}
|
|
755
917
|
|
|
756
918
|
/**
|
|
757
|
-
* Compute the
|
|
919
|
+
* Compute the Hamming distance between two SimHash values.
|
|
758
920
|
*
|
|
759
|
-
* Returns
|
|
760
|
-
* Particularly effective for short strings like names.
|
|
921
|
+
* Returns the number of differing bits. Lower values = higher similarity.
|
|
761
922
|
*
|
|
762
923
|
* # JavaScript Example
|
|
763
924
|
*
|
|
764
925
|
* ```javascript
|
|
765
|
-
* import {
|
|
926
|
+
* import { simhash, simhashDistance } from 'elid';
|
|
766
927
|
*
|
|
767
|
-
* const
|
|
768
|
-
*
|
|
928
|
+
* const hash1 = simhash("iPhone 14");
|
|
929
|
+
* const hash2 = simhash("iPhone 15");
|
|
930
|
+
* const distance = simhashDistance(hash1, hash2);
|
|
931
|
+
*
|
|
932
|
+
* console.log(distance); // Low number = similar
|
|
769
933
|
* ```
|
|
770
|
-
* @param {
|
|
771
|
-
* @param {
|
|
934
|
+
* @param {number} hash1
|
|
935
|
+
* @param {number} hash2
|
|
772
936
|
* @returns {number}
|
|
773
937
|
*/
|
|
774
|
-
export function
|
|
775
|
-
const
|
|
776
|
-
|
|
777
|
-
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
778
|
-
const len1 = WASM_VECTOR_LEN;
|
|
779
|
-
const ret = wasm.jaro(ptr0, len0, ptr1, len1);
|
|
780
|
-
return ret;
|
|
938
|
+
export function simhashDistance(hash1, hash2) {
|
|
939
|
+
const ret = wasm.simhashDistance(hash1, hash2);
|
|
940
|
+
return ret >>> 0;
|
|
781
941
|
}
|
|
782
942
|
|
|
783
943
|
/**
|
|
784
|
-
* Encode an embedding
|
|
944
|
+
* Encode an embedding with percentage-based compression.
|
|
785
945
|
*
|
|
786
|
-
*
|
|
787
|
-
*
|
|
946
|
+
* The retention percentage (0.0-1.0) controls how much information is preserved:
|
|
947
|
+
* - 1.0 = lossless (Full32 precision, all dimensions)
|
|
948
|
+
* - 0.5 = half precision and/or half dimensions
|
|
949
|
+
* - 0.25 = quarter precision and/or quarter dimensions
|
|
950
|
+
*
|
|
951
|
+
* The algorithm optimizes for dimension reduction first (which preserves
|
|
952
|
+
* more geometric relationships) before reducing precision.
|
|
788
953
|
*
|
|
789
954
|
* # Parameters
|
|
790
955
|
*
|
|
791
956
|
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
957
|
+
* - `retention_pct`: Information retention percentage (0.0-1.0)
|
|
792
958
|
*
|
|
793
959
|
* # Returns
|
|
794
960
|
*
|
|
795
|
-
* A base32hex-encoded ELID string
|
|
961
|
+
* A base32hex-encoded ELID string.
|
|
796
962
|
*
|
|
797
963
|
* # JavaScript Example
|
|
798
964
|
*
|
|
799
965
|
* ```javascript
|
|
800
|
-
* import {
|
|
966
|
+
* import { encodeElidCompressed } from 'elid';
|
|
801
967
|
*
|
|
802
968
|
* const embedding = new Float64Array(768).fill(0.1);
|
|
803
|
-
* const elid = encodeElidLossless(embedding);
|
|
804
969
|
*
|
|
805
|
-
* //
|
|
806
|
-
* const
|
|
807
|
-
*
|
|
970
|
+
* // 50% retention - good balance of size and fidelity
|
|
971
|
+
* const elid = encodeElidCompressed(embedding, 0.5);
|
|
972
|
+
*
|
|
973
|
+
* // 25% retention - smaller but less accurate
|
|
974
|
+
* const smallElid = encodeElidCompressed(embedding, 0.25);
|
|
808
975
|
* ```
|
|
809
976
|
* @param {Float64Array} embedding
|
|
977
|
+
* @param {number} retention_pct
|
|
810
978
|
* @returns {string}
|
|
811
979
|
*/
|
|
812
|
-
export function
|
|
980
|
+
export function encodeElidCompressed(embedding, retention_pct) {
|
|
813
981
|
let deferred3_0;
|
|
814
982
|
let deferred3_1;
|
|
815
983
|
try {
|
|
816
984
|
const ptr0 = passArrayF64ToWasm0(embedding, wasm.__wbindgen_malloc);
|
|
817
985
|
const len0 = WASM_VECTOR_LEN;
|
|
818
|
-
const ret = wasm.
|
|
986
|
+
const ret = wasm.encodeElidCompressed(ptr0, len0, retention_pct);
|
|
819
987
|
var ptr2 = ret[0];
|
|
820
988
|
var len2 = ret[1];
|
|
821
989
|
if (ret[3]) {
|
|
@@ -831,87 +999,45 @@ export function encodeElidLossless(embedding) {
|
|
|
831
999
|
}
|
|
832
1000
|
|
|
833
1001
|
/**
|
|
834
|
-
*
|
|
1002
|
+
* Encode an embedding with a maximum output string length constraint.
|
|
835
1003
|
*
|
|
836
|
-
*
|
|
837
|
-
*
|
|
838
|
-
* like Mini128, Morton, or Hilbert.
|
|
1004
|
+
* Calculates the optimal precision and dimension settings to fit within
|
|
1005
|
+
* the specified character limit while maximizing fidelity.
|
|
839
1006
|
*
|
|
840
1007
|
* # Parameters
|
|
841
1008
|
*
|
|
842
|
-
* - `
|
|
1009
|
+
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
1010
|
+
* - `max_chars`: Maximum output string length in characters
|
|
843
1011
|
*
|
|
844
1012
|
* # Returns
|
|
845
1013
|
*
|
|
846
|
-
* A
|
|
847
|
-
* is not reversible.
|
|
848
|
-
*
|
|
849
|
-
* Note: If dimension reduction was used during encoding, the decoded
|
|
850
|
-
* embedding will be in the reduced dimension space, not the original.
|
|
1014
|
+
* A base32hex-encoded ELID string guaranteed to be <= max_chars in length.
|
|
851
1015
|
*
|
|
852
1016
|
* # JavaScript Example
|
|
853
1017
|
*
|
|
854
1018
|
* ```javascript
|
|
855
|
-
* import {
|
|
1019
|
+
* import { encodeElidMaxLength } from 'elid';
|
|
856
1020
|
*
|
|
857
1021
|
* const embedding = new Float64Array(768).fill(0.1);
|
|
858
|
-
* const elid = encodeElidLossless(embedding);
|
|
859
|
-
*
|
|
860
|
-
* if (isElidReversible(elid)) {
|
|
861
|
-
* const recovered = decodeElidToEmbedding(elid);
|
|
862
|
-
* console.log(recovered.length); // 768
|
|
863
|
-
* }
|
|
864
|
-
* ```
|
|
865
|
-
* @param {string} elid_str
|
|
866
|
-
* @returns {any}
|
|
867
|
-
*/
|
|
868
|
-
export function decodeElidToEmbedding(elid_str) {
|
|
869
|
-
const ptr0 = passStringToWasm0(elid_str, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
870
|
-
const len0 = WASM_VECTOR_LEN;
|
|
871
|
-
const ret = wasm.decodeElidToEmbedding(ptr0, len0);
|
|
872
|
-
if (ret[2]) {
|
|
873
|
-
throw takeFromExternrefTable0(ret[1]);
|
|
874
|
-
}
|
|
875
|
-
return takeFromExternrefTable0(ret[0]);
|
|
876
|
-
}
|
|
877
|
-
|
|
878
|
-
/**
|
|
879
|
-
* Encode an embedding vector to an ELID string.
|
|
880
|
-
*
|
|
881
|
-
* Converts a high-dimensional embedding (64-2048 dimensions) into a compact,
|
|
882
|
-
* sortable identifier. The ELID preserves locality properties for efficient
|
|
883
|
-
* similarity search.
|
|
884
|
-
*
|
|
885
|
-
* # Parameters
|
|
886
|
-
*
|
|
887
|
-
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
888
|
-
* - `profile`: Encoding profile (Mini128, Morton10x10, or Hilbert10x10)
|
|
889
|
-
*
|
|
890
|
-
* # Returns
|
|
891
|
-
*
|
|
892
|
-
* A base32hex-encoded ELID string suitable for storage and comparison.
|
|
893
|
-
*
|
|
894
|
-
* # JavaScript Example
|
|
895
1022
|
*
|
|
896
|
-
*
|
|
897
|
-
*
|
|
1023
|
+
* // Fit in 100 characters (e.g., for database column constraints)
|
|
1024
|
+
* const elid = encodeElidMaxLength(embedding, 100);
|
|
1025
|
+
* console.log(elid.length <= 100); // true
|
|
898
1026
|
*
|
|
899
|
-
* //
|
|
900
|
-
* const
|
|
901
|
-
* const elid = encodeElid(embedding, ElidProfile.Mini128);
|
|
902
|
-
* console.log(elid); // "012345abcdef..."
|
|
1027
|
+
* // Fit in 50 characters (more compression)
|
|
1028
|
+
* const shortElid = encodeElidMaxLength(embedding, 50);
|
|
903
1029
|
* ```
|
|
904
1030
|
* @param {Float64Array} embedding
|
|
905
|
-
* @param {
|
|
1031
|
+
* @param {number} max_chars
|
|
906
1032
|
* @returns {string}
|
|
907
1033
|
*/
|
|
908
|
-
export function
|
|
1034
|
+
export function encodeElidMaxLength(embedding, max_chars) {
|
|
909
1035
|
let deferred3_0;
|
|
910
1036
|
let deferred3_1;
|
|
911
1037
|
try {
|
|
912
1038
|
const ptr0 = passArrayF64ToWasm0(embedding, wasm.__wbindgen_malloc);
|
|
913
1039
|
const len0 = WASM_VECTOR_LEN;
|
|
914
|
-
const ret = wasm.
|
|
1040
|
+
const ret = wasm.encodeElidMaxLength(ptr0, len0, max_chars);
|
|
915
1041
|
var ptr2 = ret[0];
|
|
916
1042
|
var len2 = ret[1];
|
|
917
1043
|
if (ret[3]) {
|
|
@@ -927,31 +1053,29 @@ export function encodeElid(embedding, profile) {
|
|
|
927
1053
|
}
|
|
928
1054
|
|
|
929
1055
|
/**
|
|
930
|
-
* Compute the
|
|
1056
|
+
* Compute the Jaro-Winkler similarity between two strings.
|
|
931
1057
|
*
|
|
932
1058
|
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
1059
|
+
* Gives more favorable ratings to strings with common prefixes.
|
|
933
1060
|
*
|
|
934
1061
|
* # JavaScript Example
|
|
935
1062
|
*
|
|
936
1063
|
* ```javascript
|
|
937
|
-
* import {
|
|
938
|
-
*
|
|
939
|
-
* const similarity = simhashSimilarity("iPhone 14", "iPhone 15");
|
|
940
|
-
* console.log(similarity); // ~0.9 (very similar)
|
|
1064
|
+
* import { jaroWinkler } from 'elid';
|
|
941
1065
|
*
|
|
942
|
-
* const
|
|
943
|
-
* console.log(
|
|
1066
|
+
* const similarity = jaroWinkler("martha", "marhta");
|
|
1067
|
+
* console.log(similarity); // ~0.961
|
|
944
1068
|
* ```
|
|
945
1069
|
* @param {string} a
|
|
946
1070
|
* @param {string} b
|
|
947
1071
|
* @returns {number}
|
|
948
1072
|
*/
|
|
949
|
-
export function
|
|
1073
|
+
export function jaroWinkler(a, b) {
|
|
950
1074
|
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
951
1075
|
const len0 = WASM_VECTOR_LEN;
|
|
952
1076
|
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
953
1077
|
const len1 = WASM_VECTOR_LEN;
|
|
954
|
-
const ret = wasm.
|
|
1078
|
+
const ret = wasm.jaroWinkler(ptr0, len0, ptr1, len1);
|
|
955
1079
|
return ret;
|
|
956
1080
|
}
|
|
957
1081
|
|
|
@@ -985,130 +1109,6 @@ export function hamming(a, b) {
|
|
|
985
1109
|
return ret === 0x100000001 ? undefined : ret;
|
|
986
1110
|
}
|
|
987
1111
|
|
|
988
|
-
/**
|
|
989
|
-
* Compute the best matching similarity between two strings.
|
|
990
|
-
*
|
|
991
|
-
* Runs multiple algorithms and returns the highest score.
|
|
992
|
-
*
|
|
993
|
-
* # JavaScript Example
|
|
994
|
-
*
|
|
995
|
-
* ```javascript
|
|
996
|
-
* import { bestMatch } from 'elid';
|
|
997
|
-
*
|
|
998
|
-
* const score = bestMatch("hello", "hallo");
|
|
999
|
-
* console.log(score); // ~0.8
|
|
1000
|
-
* ```
|
|
1001
|
-
* @param {string} a
|
|
1002
|
-
* @param {string} b
|
|
1003
|
-
* @returns {number}
|
|
1004
|
-
*/
|
|
1005
|
-
export function bestMatch(a, b) {
|
|
1006
|
-
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
1007
|
-
const len0 = WASM_VECTOR_LEN;
|
|
1008
|
-
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
1009
|
-
const len1 = WASM_VECTOR_LEN;
|
|
1010
|
-
const ret = wasm.bestMatch(ptr0, len0, ptr1, len1);
|
|
1011
|
-
return ret;
|
|
1012
|
-
}
|
|
1013
|
-
|
|
1014
|
-
/**
|
|
1015
|
-
* Compute the Jaro-Winkler similarity between two strings.
|
|
1016
|
-
*
|
|
1017
|
-
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
1018
|
-
* Gives more favorable ratings to strings with common prefixes.
|
|
1019
|
-
*
|
|
1020
|
-
* # JavaScript Example
|
|
1021
|
-
*
|
|
1022
|
-
* ```javascript
|
|
1023
|
-
* import { jaroWinkler } from 'elid';
|
|
1024
|
-
*
|
|
1025
|
-
* const similarity = jaroWinkler("martha", "marhta");
|
|
1026
|
-
* console.log(similarity); // ~0.961
|
|
1027
|
-
* ```
|
|
1028
|
-
* @param {string} a
|
|
1029
|
-
* @param {string} b
|
|
1030
|
-
* @returns {number}
|
|
1031
|
-
*/
|
|
1032
|
-
export function jaroWinkler(a, b) {
|
|
1033
|
-
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
1034
|
-
const len0 = WASM_VECTOR_LEN;
|
|
1035
|
-
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
1036
|
-
const len1 = WASM_VECTOR_LEN;
|
|
1037
|
-
const ret = wasm.jaroWinkler(ptr0, len0, ptr1, len1);
|
|
1038
|
-
return ret;
|
|
1039
|
-
}
|
|
1040
|
-
|
|
1041
|
-
/**
|
|
1042
|
-
* Compute the normalized Levenshtein similarity between two strings.
|
|
1043
|
-
*
|
|
1044
|
-
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
1045
|
-
*
|
|
1046
|
-
* # JavaScript Example
|
|
1047
|
-
*
|
|
1048
|
-
* ```javascript
|
|
1049
|
-
* import { normalizedLevenshtein } from 'elid';
|
|
1050
|
-
*
|
|
1051
|
-
* const similarity = normalizedLevenshtein("hello", "hallo");
|
|
1052
|
-
* console.log(similarity); // ~0.8
|
|
1053
|
-
* ```
|
|
1054
|
-
* @param {string} a
|
|
1055
|
-
* @param {string} b
|
|
1056
|
-
* @returns {number}
|
|
1057
|
-
*/
|
|
1058
|
-
export function normalizedLevenshtein(a, b) {
|
|
1059
|
-
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
1060
|
-
const len0 = WASM_VECTOR_LEN;
|
|
1061
|
-
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
1062
|
-
const len1 = WASM_VECTOR_LEN;
|
|
1063
|
-
const ret = wasm.normalizedLevenshtein(ptr0, len0, ptr1, len1);
|
|
1064
|
-
return ret;
|
|
1065
|
-
}
|
|
1066
|
-
|
|
1067
|
-
let cachedUint32ArrayMemory0 = null;
|
|
1068
|
-
|
|
1069
|
-
function getUint32ArrayMemory0() {
|
|
1070
|
-
if (cachedUint32ArrayMemory0 === null || cachedUint32ArrayMemory0.byteLength === 0) {
|
|
1071
|
-
cachedUint32ArrayMemory0 = new Uint32Array(wasm.memory.buffer);
|
|
1072
|
-
}
|
|
1073
|
-
return cachedUint32ArrayMemory0;
|
|
1074
|
-
}
|
|
1075
|
-
|
|
1076
|
-
function getArrayU32FromWasm0(ptr, len) {
|
|
1077
|
-
ptr = ptr >>> 0;
|
|
1078
|
-
return getUint32ArrayMemory0().subarray(ptr / 4, ptr / 4 + len);
|
|
1079
|
-
}
|
|
1080
|
-
/**
|
|
1081
|
-
* Find all hashes within a given distance threshold.
|
|
1082
|
-
*
|
|
1083
|
-
* Useful for database queries - pre-compute hashes, then find similar ones.
|
|
1084
|
-
*
|
|
1085
|
-
* # JavaScript Example
|
|
1086
|
-
*
|
|
1087
|
-
* ```javascript
|
|
1088
|
-
* import { simhash, findSimilarHashes } from 'elid';
|
|
1089
|
-
*
|
|
1090
|
-
* const candidates = ["iPhone 14 Pro", "iPhone 13", "Galaxy S23"];
|
|
1091
|
-
* const hashes = candidates.map(s => simhash(s));
|
|
1092
|
-
*
|
|
1093
|
-
* const queryHash = simhash("iPhone 14");
|
|
1094
|
-
* const matches = findSimilarHashes(queryHash, hashes, 10);
|
|
1095
|
-
*
|
|
1096
|
-
* console.log(matches); // [0, 1] - indices of similar items
|
|
1097
|
-
* ```
|
|
1098
|
-
* @param {number} query_hash
|
|
1099
|
-
* @param {Float64Array} candidate_hashes
|
|
1100
|
-
* @param {number} max_distance
|
|
1101
|
-
* @returns {Uint32Array}
|
|
1102
|
-
*/
|
|
1103
|
-
export function findSimilarHashes(query_hash, candidate_hashes, max_distance) {
|
|
1104
|
-
const ptr0 = passArrayF64ToWasm0(candidate_hashes, wasm.__wbindgen_malloc);
|
|
1105
|
-
const len0 = WASM_VECTOR_LEN;
|
|
1106
|
-
const ret = wasm.findSimilarHashes(query_hash, ptr0, len0, max_distance);
|
|
1107
|
-
var v2 = getArrayU32FromWasm0(ret[0], ret[1]).slice();
|
|
1108
|
-
wasm.__wbindgen_free(ret[0], ret[1] * 4, 4);
|
|
1109
|
-
return v2;
|
|
1110
|
-
}
|
|
1111
|
-
|
|
1112
1112
|
/**
|
|
1113
1113
|
* Dimension handling mode for full vector encoding.
|
|
1114
1114
|
*
|