elid 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +142 -6
- package/elid.d.ts +489 -87
- package/elid_bg.js +780 -182
- package/elid_bg.wasm +0 -0
- package/package.json +5 -5
package/elid_bg.js
CHANGED
|
@@ -181,89 +181,270 @@ function handleError(f, args) {
|
|
|
181
181
|
}
|
|
182
182
|
}
|
|
183
183
|
/**
|
|
184
|
-
* Compute the
|
|
184
|
+
* Compute the OSA (Optimal String Alignment) distance between two strings.
|
|
185
185
|
*
|
|
186
|
-
*
|
|
186
|
+
* Similar to Levenshtein but also considers transpositions as a single operation.
|
|
187
187
|
*
|
|
188
188
|
* # JavaScript Example
|
|
189
189
|
*
|
|
190
190
|
* ```javascript
|
|
191
|
-
* import {
|
|
192
|
-
*
|
|
193
|
-
* const similarity = simhashSimilarity("iPhone 14", "iPhone 15");
|
|
194
|
-
* console.log(similarity); // ~0.9 (very similar)
|
|
191
|
+
* import { osaDistance } from 'elid';
|
|
195
192
|
*
|
|
196
|
-
* const
|
|
197
|
-
* console.log(
|
|
193
|
+
* const distance = osaDistance("ca", "ac");
|
|
194
|
+
* console.log(distance); // 1 (transposition)
|
|
198
195
|
* ```
|
|
199
196
|
* @param {string} a
|
|
200
197
|
* @param {string} b
|
|
201
198
|
* @returns {number}
|
|
202
199
|
*/
|
|
203
|
-
export function
|
|
200
|
+
export function osaDistance(a, b) {
|
|
204
201
|
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
205
202
|
const len0 = WASM_VECTOR_LEN;
|
|
206
203
|
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
207
204
|
const len1 = WASM_VECTOR_LEN;
|
|
208
|
-
const ret = wasm.
|
|
209
|
-
return ret;
|
|
205
|
+
const ret = wasm.osaDistance(ptr0, len0, ptr1, len1);
|
|
206
|
+
return ret >>> 0;
|
|
210
207
|
}
|
|
211
208
|
|
|
212
209
|
/**
|
|
213
|
-
* Compute the
|
|
210
|
+
* Compute the Levenshtein distance between two strings.
|
|
214
211
|
*
|
|
215
|
-
* Returns
|
|
216
|
-
* Gives more favorable ratings to strings with common prefixes.
|
|
212
|
+
* Returns the minimum number of single-character edits needed to transform one string into another.
|
|
217
213
|
*
|
|
218
214
|
* # JavaScript Example
|
|
219
215
|
*
|
|
220
216
|
* ```javascript
|
|
221
|
-
* import {
|
|
217
|
+
* import { levenshtein } from 'elid';
|
|
222
218
|
*
|
|
223
|
-
* const
|
|
224
|
-
* console.log(
|
|
219
|
+
* const distance = levenshtein("kitten", "sitting");
|
|
220
|
+
* console.log(distance); // 3
|
|
225
221
|
* ```
|
|
226
222
|
* @param {string} a
|
|
227
223
|
* @param {string} b
|
|
228
224
|
* @returns {number}
|
|
229
225
|
*/
|
|
230
|
-
export function
|
|
226
|
+
export function levenshtein(a, b) {
|
|
231
227
|
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
232
228
|
const len0 = WASM_VECTOR_LEN;
|
|
233
229
|
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
234
230
|
const len1 = WASM_VECTOR_LEN;
|
|
235
|
-
const ret = wasm.
|
|
231
|
+
const ret = wasm.levenshtein(ptr0, len0, ptr1, len1);
|
|
232
|
+
return ret >>> 0;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
function passArrayJsValueToWasm0(array, malloc) {
|
|
236
|
+
const ptr = malloc(array.length * 4, 4) >>> 0;
|
|
237
|
+
for (let i = 0; i < array.length; i++) {
|
|
238
|
+
const add = addToExternrefTable0(array[i]);
|
|
239
|
+
getDataViewMemory0().setUint32(ptr + 4 * i, add, true);
|
|
240
|
+
}
|
|
241
|
+
WASM_VECTOR_LEN = array.length;
|
|
242
|
+
return ptr;
|
|
243
|
+
}
|
|
244
|
+
/**
|
|
245
|
+
* Find all matches above a threshold score.
|
|
246
|
+
*
|
|
247
|
+
* Returns an array of objects with index and score for all candidates above the threshold.
|
|
248
|
+
*
|
|
249
|
+
* # JavaScript Example
|
|
250
|
+
*
|
|
251
|
+
* ```javascript
|
|
252
|
+
* import { findMatchesAboveThreshold } from 'elid';
|
|
253
|
+
*
|
|
254
|
+
* const candidates = ["apple", "application", "apply", "banana"];
|
|
255
|
+
* const matches = findMatchesAboveThreshold("app", candidates, 0.5);
|
|
256
|
+
* console.log(matches); // [{ index: 0, score: 0.907 }, { index: 1, score: 0.830 }, ...]
|
|
257
|
+
* ```
|
|
258
|
+
* @param {string} query
|
|
259
|
+
* @param {string[]} candidates
|
|
260
|
+
* @param {number} threshold
|
|
261
|
+
* @returns {any}
|
|
262
|
+
*/
|
|
263
|
+
export function findMatchesAboveThreshold(query, candidates, threshold) {
|
|
264
|
+
const ptr0 = passStringToWasm0(query, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
265
|
+
const len0 = WASM_VECTOR_LEN;
|
|
266
|
+
const ptr1 = passArrayJsValueToWasm0(candidates, wasm.__wbindgen_malloc);
|
|
267
|
+
const len1 = WASM_VECTOR_LEN;
|
|
268
|
+
const ret = wasm.findMatchesAboveThreshold(ptr0, len0, ptr1, len1, threshold);
|
|
236
269
|
return ret;
|
|
237
270
|
}
|
|
238
271
|
|
|
239
272
|
/**
|
|
240
|
-
* Compute the Hamming distance between two
|
|
273
|
+
* Compute the Hamming distance between two SimHash values.
|
|
241
274
|
*
|
|
242
|
-
* Returns the number of
|
|
243
|
-
* Returns null if strings have different lengths.
|
|
275
|
+
* Returns the number of differing bits. Lower values = higher similarity.
|
|
244
276
|
*
|
|
245
277
|
* # JavaScript Example
|
|
246
278
|
*
|
|
247
279
|
* ```javascript
|
|
248
|
-
* import {
|
|
280
|
+
* import { simhash, simhashDistance } from 'elid';
|
|
249
281
|
*
|
|
250
|
-
* const
|
|
251
|
-
*
|
|
282
|
+
* const hash1 = simhash("iPhone 14");
|
|
283
|
+
* const hash2 = simhash("iPhone 15");
|
|
284
|
+
* const distance = simhashDistance(hash1, hash2);
|
|
252
285
|
*
|
|
253
|
-
*
|
|
254
|
-
*
|
|
286
|
+
* console.log(distance); // Low number = similar
|
|
287
|
+
* ```
|
|
288
|
+
* @param {number} hash1
|
|
289
|
+
* @param {number} hash2
|
|
290
|
+
* @returns {number}
|
|
291
|
+
*/
|
|
292
|
+
export function simhashDistance(hash1, hash2) {
|
|
293
|
+
const ret = wasm.simhashDistance(hash1, hash2);
|
|
294
|
+
return ret >>> 0;
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
let cachedFloat64ArrayMemory0 = null;
|
|
298
|
+
|
|
299
|
+
function getFloat64ArrayMemory0() {
|
|
300
|
+
if (cachedFloat64ArrayMemory0 === null || cachedFloat64ArrayMemory0.byteLength === 0) {
|
|
301
|
+
cachedFloat64ArrayMemory0 = new Float64Array(wasm.memory.buffer);
|
|
302
|
+
}
|
|
303
|
+
return cachedFloat64ArrayMemory0;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
function passArrayF64ToWasm0(arg, malloc) {
|
|
307
|
+
const ptr = malloc(arg.length * 8, 8) >>> 0;
|
|
308
|
+
getFloat64ArrayMemory0().set(arg, ptr / 8);
|
|
309
|
+
WASM_VECTOR_LEN = arg.length;
|
|
310
|
+
return ptr;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
function takeFromExternrefTable0(idx) {
|
|
314
|
+
const value = wasm.__wbindgen_externrefs.get(idx);
|
|
315
|
+
wasm.__externref_table_dealloc(idx);
|
|
316
|
+
return value;
|
|
317
|
+
}
|
|
318
|
+
/**
|
|
319
|
+
* Encode an embedding with a maximum output string length constraint.
|
|
320
|
+
*
|
|
321
|
+
* Calculates the optimal precision and dimension settings to fit within
|
|
322
|
+
* the specified character limit while maximizing fidelity.
|
|
323
|
+
*
|
|
324
|
+
* # Parameters
|
|
325
|
+
*
|
|
326
|
+
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
327
|
+
* - `max_chars`: Maximum output string length in characters
|
|
328
|
+
*
|
|
329
|
+
* # Returns
|
|
330
|
+
*
|
|
331
|
+
* A base32hex-encoded ELID string guaranteed to be <= max_chars in length.
|
|
332
|
+
*
|
|
333
|
+
* # JavaScript Example
|
|
334
|
+
*
|
|
335
|
+
* ```javascript
|
|
336
|
+
* import { encodeElidMaxLength } from 'elid';
|
|
337
|
+
*
|
|
338
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
339
|
+
*
|
|
340
|
+
* // Fit in 100 characters (e.g., for database column constraints)
|
|
341
|
+
* const elid = encodeElidMaxLength(embedding, 100);
|
|
342
|
+
* console.log(elid.length <= 100); // true
|
|
343
|
+
*
|
|
344
|
+
* // Fit in 50 characters (more compression)
|
|
345
|
+
* const shortElid = encodeElidMaxLength(embedding, 50);
|
|
346
|
+
* ```
|
|
347
|
+
* @param {Float64Array} embedding
|
|
348
|
+
* @param {number} max_chars
|
|
349
|
+
* @returns {string}
|
|
350
|
+
*/
|
|
351
|
+
export function encodeElidMaxLength(embedding, max_chars) {
|
|
352
|
+
let deferred3_0;
|
|
353
|
+
let deferred3_1;
|
|
354
|
+
try {
|
|
355
|
+
const ptr0 = passArrayF64ToWasm0(embedding, wasm.__wbindgen_malloc);
|
|
356
|
+
const len0 = WASM_VECTOR_LEN;
|
|
357
|
+
const ret = wasm.encodeElidMaxLength(ptr0, len0, max_chars);
|
|
358
|
+
var ptr2 = ret[0];
|
|
359
|
+
var len2 = ret[1];
|
|
360
|
+
if (ret[3]) {
|
|
361
|
+
ptr2 = 0; len2 = 0;
|
|
362
|
+
throw takeFromExternrefTable0(ret[2]);
|
|
363
|
+
}
|
|
364
|
+
deferred3_0 = ptr2;
|
|
365
|
+
deferred3_1 = len2;
|
|
366
|
+
return getStringFromWasm0(ptr2, len2);
|
|
367
|
+
} finally {
|
|
368
|
+
wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
function _assertClass(instance, klass) {
|
|
373
|
+
if (!(instance instanceof klass)) {
|
|
374
|
+
throw new Error(`expected instance of ${klass.name}`);
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
/**
|
|
378
|
+
* Compute Levenshtein distance with custom options.
|
|
379
|
+
*
|
|
380
|
+
* # JavaScript Example
|
|
381
|
+
*
|
|
382
|
+
* ```javascript
|
|
383
|
+
* import { levenshteinWithOpts, SimilarityOptions } from 'elid';
|
|
384
|
+
*
|
|
385
|
+
* const opts = new SimilarityOptions();
|
|
386
|
+
* opts.setCaseSensitive(false);
|
|
387
|
+
* opts.setTrimWhitespace(true);
|
|
388
|
+
*
|
|
389
|
+
* const distance = levenshteinWithOpts(" HELLO ", "hello", opts);
|
|
390
|
+
* console.log(distance); // 0
|
|
255
391
|
* ```
|
|
256
392
|
* @param {string} a
|
|
257
393
|
* @param {string} b
|
|
258
|
-
* @
|
|
394
|
+
* @param {SimilarityOptions} opts
|
|
395
|
+
* @returns {number}
|
|
259
396
|
*/
|
|
260
|
-
export function
|
|
397
|
+
export function levenshteinWithOpts(a, b, opts) {
|
|
261
398
|
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
262
399
|
const len0 = WASM_VECTOR_LEN;
|
|
263
400
|
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
264
401
|
const len1 = WASM_VECTOR_LEN;
|
|
265
|
-
|
|
266
|
-
|
|
402
|
+
_assertClass(opts, SimilarityOptions);
|
|
403
|
+
var ptr2 = opts.__destroy_into_raw();
|
|
404
|
+
const ret = wasm.levenshteinWithOpts(ptr0, len0, ptr1, len1, ptr2);
|
|
405
|
+
return ret >>> 0;
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
/**
|
|
409
|
+
* Check if an ELID can be decoded back to an embedding.
|
|
410
|
+
*
|
|
411
|
+
* Returns true if the ELID was encoded with a FullVector profile
|
|
412
|
+
* (lossless, compressed, or max_length), false otherwise.
|
|
413
|
+
*
|
|
414
|
+
* # Parameters
|
|
415
|
+
*
|
|
416
|
+
* - `elid_str`: A valid ELID string (base32hex encoded)
|
|
417
|
+
*
|
|
418
|
+
* # Returns
|
|
419
|
+
*
|
|
420
|
+
* `true` if decodeElidToEmbedding will return an embedding, `false` otherwise.
|
|
421
|
+
*
|
|
422
|
+
* # JavaScript Example
|
|
423
|
+
*
|
|
424
|
+
* ```javascript
|
|
425
|
+
* import { encodeElid, encodeElidLossless, isElidReversible, ElidProfile } from 'elid';
|
|
426
|
+
*
|
|
427
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
428
|
+
*
|
|
429
|
+
* // Mini128 is NOT reversible
|
|
430
|
+
* const mini128Elid = encodeElid(embedding, ElidProfile.Mini128);
|
|
431
|
+
* console.log(isElidReversible(mini128Elid)); // false
|
|
432
|
+
*
|
|
433
|
+
* // Lossless IS reversible
|
|
434
|
+
* const losslessElid = encodeElidLossless(embedding);
|
|
435
|
+
* console.log(isElidReversible(losslessElid)); // true
|
|
436
|
+
* ```
|
|
437
|
+
* @param {string} elid_str
|
|
438
|
+
* @returns {boolean}
|
|
439
|
+
*/
|
|
440
|
+
export function isElidReversible(elid_str) {
|
|
441
|
+
const ptr0 = passStringToWasm0(elid_str, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
442
|
+
const len0 = WASM_VECTOR_LEN;
|
|
443
|
+
const ret = wasm.isElidReversible(ptr0, len0);
|
|
444
|
+
if (ret[2]) {
|
|
445
|
+
throw takeFromExternrefTable0(ret[1]);
|
|
446
|
+
}
|
|
447
|
+
return ret[0] !== 0;
|
|
267
448
|
}
|
|
268
449
|
|
|
269
450
|
/**
|
|
@@ -297,216 +478,511 @@ export function simhash(text) {
|
|
|
297
478
|
return ret;
|
|
298
479
|
}
|
|
299
480
|
|
|
481
|
+
function getArrayU8FromWasm0(ptr, len) {
|
|
482
|
+
ptr = ptr >>> 0;
|
|
483
|
+
return getUint8ArrayMemory0().subarray(ptr / 1, ptr / 1 + len);
|
|
484
|
+
}
|
|
300
485
|
/**
|
|
301
|
-
*
|
|
486
|
+
* Decode an ELID string to raw bytes.
|
|
302
487
|
*
|
|
303
|
-
* Returns the
|
|
488
|
+
* Returns the raw byte representation of an ELID, including the header
|
|
489
|
+
* and payload bytes. Useful for custom processing or debugging.
|
|
490
|
+
*
|
|
491
|
+
* # Parameters
|
|
492
|
+
*
|
|
493
|
+
* - `elid_str`: A valid ELID string (base32hex encoded)
|
|
494
|
+
*
|
|
495
|
+
* # Returns
|
|
496
|
+
*
|
|
497
|
+
* A Uint8Array containing the raw bytes (header + payload).
|
|
304
498
|
*
|
|
305
499
|
* # JavaScript Example
|
|
306
500
|
*
|
|
307
501
|
* ```javascript
|
|
308
|
-
* import {
|
|
502
|
+
* import { decodeElid } from 'elid';
|
|
309
503
|
*
|
|
310
|
-
* const
|
|
311
|
-
* console.log(
|
|
504
|
+
* const bytes = decodeElid("012345abcdef...");
|
|
505
|
+
* console.log(bytes); // Uint8Array [...]
|
|
312
506
|
* ```
|
|
313
|
-
* @param {string}
|
|
314
|
-
* @
|
|
315
|
-
* @returns {number}
|
|
507
|
+
* @param {string} elid_str
|
|
508
|
+
* @returns {Uint8Array}
|
|
316
509
|
*/
|
|
317
|
-
export function
|
|
318
|
-
const ptr0 = passStringToWasm0(
|
|
510
|
+
export function decodeElid(elid_str) {
|
|
511
|
+
const ptr0 = passStringToWasm0(elid_str, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
319
512
|
const len0 = WASM_VECTOR_LEN;
|
|
320
|
-
const
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
513
|
+
const ret = wasm.decodeElid(ptr0, len0);
|
|
514
|
+
if (ret[3]) {
|
|
515
|
+
throw takeFromExternrefTable0(ret[2]);
|
|
516
|
+
}
|
|
517
|
+
var v2 = getArrayU8FromWasm0(ret[0], ret[1]).slice();
|
|
518
|
+
wasm.__wbindgen_free(ret[0], ret[1] * 1, 1);
|
|
519
|
+
return v2;
|
|
324
520
|
}
|
|
325
521
|
|
|
326
|
-
|
|
522
|
+
/**
|
|
523
|
+
* Get metadata about a FullVector ELID.
|
|
524
|
+
*
|
|
525
|
+
* Returns an object containing information about how the ELID was encoded,
|
|
526
|
+
* including original dimensions, precision, and dimension mode.
|
|
527
|
+
*
|
|
528
|
+
* # Parameters
|
|
529
|
+
*
|
|
530
|
+
* - `elid_str`: A valid ELID string (base32hex encoded)
|
|
531
|
+
*
|
|
532
|
+
* # Returns
|
|
533
|
+
*
|
|
534
|
+
* An object with metadata fields, or null if not a FullVector ELID.
|
|
535
|
+
*
|
|
536
|
+
* # JavaScript Example
|
|
537
|
+
*
|
|
538
|
+
* ```javascript
|
|
539
|
+
* import { encodeElidCompressed, getElidMetadata } from 'elid';
|
|
540
|
+
*
|
|
541
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
542
|
+
* const elid = encodeElidCompressed(embedding, 0.5);
|
|
543
|
+
*
|
|
544
|
+
* const meta = getElidMetadata(elid);
|
|
545
|
+
* if (meta) {
|
|
546
|
+
* console.log(meta.originalDims); // 768
|
|
547
|
+
* console.log(meta.encodedDims); // depends on compression
|
|
548
|
+
* console.log(meta.isLossless); // false
|
|
549
|
+
* }
|
|
550
|
+
* ```
|
|
551
|
+
* @param {string} elid_str
|
|
552
|
+
* @returns {any}
|
|
553
|
+
*/
|
|
554
|
+
export function getElidMetadata(elid_str) {
|
|
555
|
+
const ptr0 = passStringToWasm0(elid_str, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
556
|
+
const len0 = WASM_VECTOR_LEN;
|
|
557
|
+
const ret = wasm.getElidMetadata(ptr0, len0);
|
|
558
|
+
if (ret[2]) {
|
|
559
|
+
throw takeFromExternrefTable0(ret[1]);
|
|
560
|
+
}
|
|
561
|
+
return takeFromExternrefTable0(ret[0]);
|
|
562
|
+
}
|
|
327
563
|
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
564
|
+
/**
|
|
565
|
+
* Encode an embedding for cross-dimensional comparison.
|
|
566
|
+
*
|
|
567
|
+
* Projects the embedding to a common dimension space, allowing comparison
|
|
568
|
+
* between embeddings of different original dimensions (e.g., 256d vs 768d).
|
|
569
|
+
*
|
|
570
|
+
* # Parameters
|
|
571
|
+
*
|
|
572
|
+
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
573
|
+
* - `common_dims`: Target dimension space (all vectors projected here)
|
|
574
|
+
*
|
|
575
|
+
* # Returns
|
|
576
|
+
*
|
|
577
|
+
* A base32hex-encoded ELID string.
|
|
578
|
+
*
|
|
579
|
+
* # JavaScript Example
|
|
580
|
+
*
|
|
581
|
+
* ```javascript
|
|
582
|
+
* import { encodeElidCrossDimensional, decodeElidToEmbedding } from 'elid';
|
|
583
|
+
*
|
|
584
|
+
* // Different sized embeddings from different models
|
|
585
|
+
* const embedding256 = new Float64Array(256).fill(0.1);
|
|
586
|
+
* const embedding768 = new Float64Array(768).fill(0.1);
|
|
587
|
+
*
|
|
588
|
+
* // Project both to 128-dim common space
|
|
589
|
+
* const elid1 = encodeElidCrossDimensional(embedding256, 128);
|
|
590
|
+
* const elid2 = encodeElidCrossDimensional(embedding768, 128);
|
|
591
|
+
*
|
|
592
|
+
* // Now they can be compared directly (both decode to 128 dims)
|
|
593
|
+
* const dec1 = decodeElidToEmbedding(elid1);
|
|
594
|
+
* const dec2 = decodeElidToEmbedding(elid2);
|
|
595
|
+
* // Both have length 128
|
|
596
|
+
* ```
|
|
597
|
+
* @param {Float64Array} embedding
|
|
598
|
+
* @param {number} common_dims
|
|
599
|
+
* @returns {string}
|
|
600
|
+
*/
|
|
601
|
+
export function encodeElidCrossDimensional(embedding, common_dims) {
|
|
602
|
+
let deferred3_0;
|
|
603
|
+
let deferred3_1;
|
|
604
|
+
try {
|
|
605
|
+
const ptr0 = passArrayF64ToWasm0(embedding, wasm.__wbindgen_malloc);
|
|
606
|
+
const len0 = WASM_VECTOR_LEN;
|
|
607
|
+
const ret = wasm.encodeElidCrossDimensional(ptr0, len0, common_dims);
|
|
608
|
+
var ptr2 = ret[0];
|
|
609
|
+
var len2 = ret[1];
|
|
610
|
+
if (ret[3]) {
|
|
611
|
+
ptr2 = 0; len2 = 0;
|
|
612
|
+
throw takeFromExternrefTable0(ret[2]);
|
|
613
|
+
}
|
|
614
|
+
deferred3_0 = ptr2;
|
|
615
|
+
deferred3_1 = len2;
|
|
616
|
+
return getStringFromWasm0(ptr2, len2);
|
|
617
|
+
} finally {
|
|
618
|
+
wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
|
|
331
619
|
}
|
|
332
|
-
return cachedFloat64ArrayMemory0;
|
|
333
620
|
}
|
|
334
621
|
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
622
|
+
/**
|
|
623
|
+
* Compute the Hamming distance between two ELID strings.
|
|
624
|
+
*
|
|
625
|
+
* Returns the number of differing bits between two Mini128 ELIDs.
|
|
626
|
+
* This distance is proportional to the angular distance between the
|
|
627
|
+
* original embeddings (lower = more similar).
|
|
628
|
+
*
|
|
629
|
+
* # Requirements
|
|
630
|
+
*
|
|
631
|
+
* Both ELIDs must use the Mini128 profile.
|
|
632
|
+
*
|
|
633
|
+
* # Parameters
|
|
634
|
+
*
|
|
635
|
+
* - `elid1`: First ELID string
|
|
636
|
+
* - `elid2`: Second ELID string
|
|
637
|
+
*
|
|
638
|
+
* # Returns
|
|
639
|
+
*
|
|
640
|
+
* Hamming distance (0-128). 0 means identical, 128 means completely different.
|
|
641
|
+
*
|
|
642
|
+
* # JavaScript Example
|
|
643
|
+
*
|
|
644
|
+
* ```javascript
|
|
645
|
+
* import { encodeElid, elidHammingDistance, ElidProfile } from 'elid';
|
|
646
|
+
*
|
|
647
|
+
* const elid1 = encodeElid(embedding1, ElidProfile.Mini128);
|
|
648
|
+
* const elid2 = encodeElid(embedding2, ElidProfile.Mini128);
|
|
649
|
+
*
|
|
650
|
+
* const distance = elidHammingDistance(elid1, elid2);
|
|
651
|
+
* if (distance < 20) {
|
|
652
|
+
* console.log("Very similar embeddings!");
|
|
653
|
+
* }
|
|
654
|
+
* ```
|
|
655
|
+
* @param {string} elid1
|
|
656
|
+
* @param {string} elid2
|
|
657
|
+
* @returns {number}
|
|
658
|
+
*/
|
|
659
|
+
export function elidHammingDistance(elid1, elid2) {
|
|
660
|
+
const ptr0 = passStringToWasm0(elid1, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
661
|
+
const len0 = WASM_VECTOR_LEN;
|
|
662
|
+
const ptr1 = passStringToWasm0(elid2, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
663
|
+
const len1 = WASM_VECTOR_LEN;
|
|
664
|
+
const ret = wasm.elidHammingDistance(ptr0, len0, ptr1, len1);
|
|
665
|
+
if (ret[2]) {
|
|
666
|
+
throw takeFromExternrefTable0(ret[1]);
|
|
667
|
+
}
|
|
668
|
+
return ret[0] >>> 0;
|
|
340
669
|
}
|
|
341
670
|
|
|
342
|
-
|
|
671
|
+
/**
|
|
672
|
+
* Find the best match for a query string in an array of candidates.
|
|
673
|
+
*
|
|
674
|
+
* Returns an object with the index and similarity score of the best match.
|
|
675
|
+
*
|
|
676
|
+
* # JavaScript Example
|
|
677
|
+
*
|
|
678
|
+
* ```javascript
|
|
679
|
+
* import { findBestMatch } from 'elid';
|
|
680
|
+
*
|
|
681
|
+
* const candidates = ["apple", "application", "apply"];
|
|
682
|
+
* const result = findBestMatch("app", candidates);
|
|
683
|
+
* console.log(result); // { index: 0, score: 0.907 }
|
|
684
|
+
* ```
|
|
685
|
+
* @param {string} query
|
|
686
|
+
* @param {string[]} candidates
|
|
687
|
+
* @returns {object}
|
|
688
|
+
*/
|
|
689
|
+
export function findBestMatch(query, candidates) {
|
|
690
|
+
const ptr0 = passStringToWasm0(query, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
691
|
+
const len0 = WASM_VECTOR_LEN;
|
|
692
|
+
const ptr1 = passArrayJsValueToWasm0(candidates, wasm.__wbindgen_malloc);
|
|
693
|
+
const len1 = WASM_VECTOR_LEN;
|
|
694
|
+
const ret = wasm.findBestMatch(ptr0, len0, ptr1, len1);
|
|
695
|
+
return ret;
|
|
696
|
+
}
|
|
343
697
|
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
698
|
+
/**
|
|
699
|
+
* Encode an embedding with percentage-based compression.
|
|
700
|
+
*
|
|
701
|
+
* The retention percentage (0.0-1.0) controls how much information is preserved:
|
|
702
|
+
* - 1.0 = lossless (Full32 precision, all dimensions)
|
|
703
|
+
* - 0.5 = half precision and/or half dimensions
|
|
704
|
+
* - 0.25 = quarter precision and/or quarter dimensions
|
|
705
|
+
*
|
|
706
|
+
* The algorithm optimizes for dimension reduction first (which preserves
|
|
707
|
+
* more geometric relationships) before reducing precision.
|
|
708
|
+
*
|
|
709
|
+
* # Parameters
|
|
710
|
+
*
|
|
711
|
+
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
712
|
+
* - `retention_pct`: Information retention percentage (0.0-1.0)
|
|
713
|
+
*
|
|
714
|
+
* # Returns
|
|
715
|
+
*
|
|
716
|
+
* A base32hex-encoded ELID string.
|
|
717
|
+
*
|
|
718
|
+
* # JavaScript Example
|
|
719
|
+
*
|
|
720
|
+
* ```javascript
|
|
721
|
+
* import { encodeElidCompressed } from 'elid';
|
|
722
|
+
*
|
|
723
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
724
|
+
*
|
|
725
|
+
* // 50% retention - good balance of size and fidelity
|
|
726
|
+
* const elid = encodeElidCompressed(embedding, 0.5);
|
|
727
|
+
*
|
|
728
|
+
* // 25% retention - smaller but less accurate
|
|
729
|
+
* const smallElid = encodeElidCompressed(embedding, 0.25);
|
|
730
|
+
* ```
|
|
731
|
+
* @param {Float64Array} embedding
|
|
732
|
+
* @param {number} retention_pct
|
|
733
|
+
* @returns {string}
|
|
734
|
+
*/
|
|
735
|
+
export function encodeElidCompressed(embedding, retention_pct) {
|
|
736
|
+
let deferred3_0;
|
|
737
|
+
let deferred3_1;
|
|
738
|
+
try {
|
|
739
|
+
const ptr0 = passArrayF64ToWasm0(embedding, wasm.__wbindgen_malloc);
|
|
740
|
+
const len0 = WASM_VECTOR_LEN;
|
|
741
|
+
const ret = wasm.encodeElidCompressed(ptr0, len0, retention_pct);
|
|
742
|
+
var ptr2 = ret[0];
|
|
743
|
+
var len2 = ret[1];
|
|
744
|
+
if (ret[3]) {
|
|
745
|
+
ptr2 = 0; len2 = 0;
|
|
746
|
+
throw takeFromExternrefTable0(ret[2]);
|
|
747
|
+
}
|
|
748
|
+
deferred3_0 = ptr2;
|
|
749
|
+
deferred3_1 = len2;
|
|
750
|
+
return getStringFromWasm0(ptr2, len2);
|
|
751
|
+
} finally {
|
|
752
|
+
wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
|
|
347
753
|
}
|
|
348
|
-
return cachedUint32ArrayMemory0;
|
|
349
754
|
}
|
|
350
755
|
|
|
351
|
-
function getArrayU32FromWasm0(ptr, len) {
|
|
352
|
-
ptr = ptr >>> 0;
|
|
353
|
-
return getUint32ArrayMemory0().subarray(ptr / 4, ptr / 4 + len);
|
|
354
|
-
}
|
|
355
756
|
/**
|
|
356
|
-
*
|
|
757
|
+
* Compute the Jaro similarity between two strings.
|
|
357
758
|
*
|
|
358
|
-
*
|
|
759
|
+
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
760
|
+
* Particularly effective for short strings like names.
|
|
359
761
|
*
|
|
360
762
|
* # JavaScript Example
|
|
361
763
|
*
|
|
362
764
|
* ```javascript
|
|
363
|
-
* import {
|
|
765
|
+
* import { jaro } from 'elid';
|
|
364
766
|
*
|
|
365
|
-
* const
|
|
366
|
-
*
|
|
767
|
+
* const similarity = jaro("martha", "marhta");
|
|
768
|
+
* console.log(similarity); // ~0.944
|
|
769
|
+
* ```
|
|
770
|
+
* @param {string} a
|
|
771
|
+
* @param {string} b
|
|
772
|
+
* @returns {number}
|
|
773
|
+
*/
|
|
774
|
+
export function jaro(a, b) {
|
|
775
|
+
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
776
|
+
const len0 = WASM_VECTOR_LEN;
|
|
777
|
+
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
778
|
+
const len1 = WASM_VECTOR_LEN;
|
|
779
|
+
const ret = wasm.jaro(ptr0, len0, ptr1, len1);
|
|
780
|
+
return ret;
|
|
781
|
+
}
|
|
782
|
+
|
|
783
|
+
/**
|
|
784
|
+
* Encode an embedding using lossless full vector encoding.
|
|
367
785
|
*
|
|
368
|
-
*
|
|
369
|
-
*
|
|
786
|
+
* Preserves the exact embedding values (32-bit float precision) and all dimensions.
|
|
787
|
+
* This produces the largest output but allows exact reconstruction.
|
|
788
|
+
*
|
|
789
|
+
* # Parameters
|
|
790
|
+
*
|
|
791
|
+
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
792
|
+
*
|
|
793
|
+
* # Returns
|
|
794
|
+
*
|
|
795
|
+
* A base32hex-encoded ELID string that can be decoded back to the original embedding.
|
|
796
|
+
*
|
|
797
|
+
* # JavaScript Example
|
|
798
|
+
*
|
|
799
|
+
* ```javascript
|
|
800
|
+
* import { encodeElidLossless, decodeElidToEmbedding } from 'elid';
|
|
801
|
+
*
|
|
802
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
803
|
+
* const elid = encodeElidLossless(embedding);
|
|
370
804
|
*
|
|
371
|
-
*
|
|
805
|
+
* // Later, recover the exact embedding
|
|
806
|
+
* const recovered = decodeElidToEmbedding(elid);
|
|
807
|
+
* // recovered is identical to embedding
|
|
372
808
|
* ```
|
|
373
|
-
* @param {
|
|
374
|
-
* @
|
|
375
|
-
* @param {number} max_distance
|
|
376
|
-
* @returns {Uint32Array}
|
|
809
|
+
* @param {Float64Array} embedding
|
|
810
|
+
* @returns {string}
|
|
377
811
|
*/
|
|
378
|
-
export function
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
812
|
+
export function encodeElidLossless(embedding) {
|
|
813
|
+
let deferred3_0;
|
|
814
|
+
let deferred3_1;
|
|
815
|
+
try {
|
|
816
|
+
const ptr0 = passArrayF64ToWasm0(embedding, wasm.__wbindgen_malloc);
|
|
817
|
+
const len0 = WASM_VECTOR_LEN;
|
|
818
|
+
const ret = wasm.encodeElidLossless(ptr0, len0);
|
|
819
|
+
var ptr2 = ret[0];
|
|
820
|
+
var len2 = ret[1];
|
|
821
|
+
if (ret[3]) {
|
|
822
|
+
ptr2 = 0; len2 = 0;
|
|
823
|
+
throw takeFromExternrefTable0(ret[2]);
|
|
824
|
+
}
|
|
825
|
+
deferred3_0 = ptr2;
|
|
826
|
+
deferred3_1 = len2;
|
|
827
|
+
return getStringFromWasm0(ptr2, len2);
|
|
828
|
+
} finally {
|
|
829
|
+
wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
|
|
392
830
|
}
|
|
393
|
-
WASM_VECTOR_LEN = array.length;
|
|
394
|
-
return ptr;
|
|
395
831
|
}
|
|
832
|
+
|
|
396
833
|
/**
|
|
397
|
-
*
|
|
834
|
+
* Decode an ELID string back to an embedding vector.
|
|
398
835
|
*
|
|
399
|
-
*
|
|
836
|
+
* Only works for ELIDs encoded with a FullVector profile (lossless,
|
|
837
|
+
* compressed, or max_length). Returns null for non-reversible profiles
|
|
838
|
+
* like Mini128, Morton, or Hilbert.
|
|
839
|
+
*
|
|
840
|
+
* # Parameters
|
|
841
|
+
*
|
|
842
|
+
* - `elid_str`: A valid ELID string (base32hex encoded)
|
|
843
|
+
*
|
|
844
|
+
* # Returns
|
|
845
|
+
*
|
|
846
|
+
* A Float64Array containing the decoded embedding, or null if the ELID
|
|
847
|
+
* is not reversible.
|
|
848
|
+
*
|
|
849
|
+
* Note: If dimension reduction was used during encoding, the decoded
|
|
850
|
+
* embedding will be in the reduced dimension space, not the original.
|
|
400
851
|
*
|
|
401
852
|
* # JavaScript Example
|
|
402
853
|
*
|
|
403
854
|
* ```javascript
|
|
404
|
-
* import {
|
|
855
|
+
* import { encodeElidLossless, decodeElidToEmbedding, isElidReversible } from 'elid';
|
|
405
856
|
*
|
|
406
|
-
* const
|
|
407
|
-
* const
|
|
408
|
-
*
|
|
857
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
858
|
+
* const elid = encodeElidLossless(embedding);
|
|
859
|
+
*
|
|
860
|
+
* if (isElidReversible(elid)) {
|
|
861
|
+
* const recovered = decodeElidToEmbedding(elid);
|
|
862
|
+
* console.log(recovered.length); // 768
|
|
863
|
+
* }
|
|
409
864
|
* ```
|
|
410
|
-
* @param {string}
|
|
411
|
-
* @param {string[]} candidates
|
|
412
|
-
* @param {number} threshold
|
|
865
|
+
* @param {string} elid_str
|
|
413
866
|
* @returns {any}
|
|
414
867
|
*/
|
|
415
|
-
export function
|
|
416
|
-
const ptr0 = passStringToWasm0(
|
|
868
|
+
export function decodeElidToEmbedding(elid_str) {
|
|
869
|
+
const ptr0 = passStringToWasm0(elid_str, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
417
870
|
const len0 = WASM_VECTOR_LEN;
|
|
418
|
-
const
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
871
|
+
const ret = wasm.decodeElidToEmbedding(ptr0, len0);
|
|
872
|
+
if (ret[2]) {
|
|
873
|
+
throw takeFromExternrefTable0(ret[1]);
|
|
874
|
+
}
|
|
875
|
+
return takeFromExternrefTable0(ret[0]);
|
|
422
876
|
}
|
|
423
877
|
|
|
424
878
|
/**
|
|
425
|
-
*
|
|
879
|
+
* Encode an embedding vector to an ELID string.
|
|
426
880
|
*
|
|
427
|
-
*
|
|
881
|
+
* Converts a high-dimensional embedding (64-2048 dimensions) into a compact,
|
|
882
|
+
* sortable identifier. The ELID preserves locality properties for efficient
|
|
883
|
+
* similarity search.
|
|
884
|
+
*
|
|
885
|
+
* # Parameters
|
|
886
|
+
*
|
|
887
|
+
* - `embedding`: Float64 array of embedding values (64-2048 dimensions)
|
|
888
|
+
* - `profile`: Encoding profile (Mini128, Morton10x10, or Hilbert10x10)
|
|
889
|
+
*
|
|
890
|
+
* # Returns
|
|
891
|
+
*
|
|
892
|
+
* A base32hex-encoded ELID string suitable for storage and comparison.
|
|
428
893
|
*
|
|
429
894
|
* # JavaScript Example
|
|
430
895
|
*
|
|
431
896
|
* ```javascript
|
|
432
|
-
* import {
|
|
897
|
+
* import { encodeElid, ElidProfile } from 'elid';
|
|
433
898
|
*
|
|
434
|
-
*
|
|
435
|
-
*
|
|
899
|
+
* // OpenAI embeddings are 1536 dimensions
|
|
900
|
+
* const embedding = await getEmbedding("Hello world");
|
|
901
|
+
* const elid = encodeElid(embedding, ElidProfile.Mini128);
|
|
902
|
+
* console.log(elid); // "012345abcdef..."
|
|
436
903
|
* ```
|
|
437
|
-
* @param {
|
|
438
|
-
* @param {
|
|
439
|
-
* @returns {
|
|
904
|
+
* @param {Float64Array} embedding
|
|
905
|
+
* @param {ElidProfile} profile
|
|
906
|
+
* @returns {string}
|
|
440
907
|
*/
|
|
441
|
-
export function
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
908
|
+
export function encodeElid(embedding, profile) {
|
|
909
|
+
let deferred3_0;
|
|
910
|
+
let deferred3_1;
|
|
911
|
+
try {
|
|
912
|
+
const ptr0 = passArrayF64ToWasm0(embedding, wasm.__wbindgen_malloc);
|
|
913
|
+
const len0 = WASM_VECTOR_LEN;
|
|
914
|
+
const ret = wasm.encodeElid(ptr0, len0, profile);
|
|
915
|
+
var ptr2 = ret[0];
|
|
916
|
+
var len2 = ret[1];
|
|
917
|
+
if (ret[3]) {
|
|
918
|
+
ptr2 = 0; len2 = 0;
|
|
919
|
+
throw takeFromExternrefTable0(ret[2]);
|
|
920
|
+
}
|
|
921
|
+
deferred3_0 = ptr2;
|
|
922
|
+
deferred3_1 = len2;
|
|
923
|
+
return getStringFromWasm0(ptr2, len2);
|
|
924
|
+
} finally {
|
|
925
|
+
wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
|
|
926
|
+
}
|
|
448
927
|
}
|
|
449
928
|
|
|
450
929
|
/**
|
|
451
|
-
* Compute the normalized
|
|
930
|
+
* Compute the normalized SimHash similarity between two strings.
|
|
452
931
|
*
|
|
453
932
|
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
454
933
|
*
|
|
455
934
|
* # JavaScript Example
|
|
456
935
|
*
|
|
457
936
|
* ```javascript
|
|
458
|
-
* import {
|
|
937
|
+
* import { simhashSimilarity } from 'elid';
|
|
459
938
|
*
|
|
460
|
-
* const similarity =
|
|
461
|
-
* console.log(similarity); // ~0.
|
|
939
|
+
* const similarity = simhashSimilarity("iPhone 14", "iPhone 15");
|
|
940
|
+
* console.log(similarity); // ~0.9 (very similar)
|
|
941
|
+
*
|
|
942
|
+
* const similarity2 = simhashSimilarity("iPhone", "Galaxy");
|
|
943
|
+
* console.log(similarity2); // ~0.4 (different)
|
|
462
944
|
* ```
|
|
463
945
|
* @param {string} a
|
|
464
946
|
* @param {string} b
|
|
465
947
|
* @returns {number}
|
|
466
948
|
*/
|
|
467
|
-
export function
|
|
949
|
+
export function simhashSimilarity(a, b) {
|
|
468
950
|
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
469
951
|
const len0 = WASM_VECTOR_LEN;
|
|
470
952
|
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
471
953
|
const len1 = WASM_VECTOR_LEN;
|
|
472
|
-
const ret = wasm.
|
|
954
|
+
const ret = wasm.simhashSimilarity(ptr0, len0, ptr1, len1);
|
|
473
955
|
return ret;
|
|
474
956
|
}
|
|
475
957
|
|
|
476
|
-
function _assertClass(instance, klass) {
|
|
477
|
-
if (!(instance instanceof klass)) {
|
|
478
|
-
throw new Error(`expected instance of ${klass.name}`);
|
|
479
|
-
}
|
|
480
|
-
}
|
|
481
958
|
/**
|
|
482
|
-
* Compute
|
|
959
|
+
* Compute the Hamming distance between two strings.
|
|
960
|
+
*
|
|
961
|
+
* Returns the number of positions at which the characters differ.
|
|
962
|
+
* Returns null if strings have different lengths.
|
|
483
963
|
*
|
|
484
964
|
* # JavaScript Example
|
|
485
965
|
*
|
|
486
966
|
* ```javascript
|
|
487
|
-
* import {
|
|
967
|
+
* import { hamming } from 'elid';
|
|
488
968
|
*
|
|
489
|
-
* const
|
|
490
|
-
*
|
|
491
|
-
* opts.setTrimWhitespace(true);
|
|
969
|
+
* const distance = hamming("karolin", "kathrin");
|
|
970
|
+
* console.log(distance); // 3
|
|
492
971
|
*
|
|
493
|
-
* const
|
|
494
|
-
* console.log(
|
|
972
|
+
* const invalid = hamming("hello", "world!");
|
|
973
|
+
* console.log(invalid); // null
|
|
495
974
|
* ```
|
|
496
975
|
* @param {string} a
|
|
497
976
|
* @param {string} b
|
|
498
|
-
* @
|
|
499
|
-
* @returns {number}
|
|
977
|
+
* @returns {number | undefined}
|
|
500
978
|
*/
|
|
501
|
-
export function
|
|
979
|
+
export function hamming(a, b) {
|
|
502
980
|
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
503
981
|
const len0 = WASM_VECTOR_LEN;
|
|
504
982
|
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
505
983
|
const len1 = WASM_VECTOR_LEN;
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
const ret = wasm.levenshteinWithOpts(ptr0, len0, ptr1, len1, ptr2);
|
|
509
|
-
return ret >>> 0;
|
|
984
|
+
const ret = wasm.hamming(ptr0, len0, ptr1, len1);
|
|
985
|
+
return ret === 0x100000001 ? undefined : ret;
|
|
510
986
|
}
|
|
511
987
|
|
|
512
988
|
/**
|
|
@@ -536,84 +1012,197 @@ export function bestMatch(a, b) {
|
|
|
536
1012
|
}
|
|
537
1013
|
|
|
538
1014
|
/**
|
|
539
|
-
* Compute the
|
|
1015
|
+
* Compute the Jaro-Winkler similarity between two strings.
|
|
540
1016
|
*
|
|
541
|
-
* Returns
|
|
1017
|
+
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
1018
|
+
* Gives more favorable ratings to strings with common prefixes.
|
|
542
1019
|
*
|
|
543
1020
|
* # JavaScript Example
|
|
544
1021
|
*
|
|
545
1022
|
* ```javascript
|
|
546
|
-
* import {
|
|
547
|
-
*
|
|
548
|
-
* const hash1 = simhash("iPhone 14");
|
|
549
|
-
* const hash2 = simhash("iPhone 15");
|
|
550
|
-
* const distance = simhashDistance(hash1, hash2);
|
|
1023
|
+
* import { jaroWinkler } from 'elid';
|
|
551
1024
|
*
|
|
552
|
-
*
|
|
1025
|
+
* const similarity = jaroWinkler("martha", "marhta");
|
|
1026
|
+
* console.log(similarity); // ~0.961
|
|
553
1027
|
* ```
|
|
554
|
-
* @param {
|
|
555
|
-
* @param {
|
|
1028
|
+
* @param {string} a
|
|
1029
|
+
* @param {string} b
|
|
556
1030
|
* @returns {number}
|
|
557
1031
|
*/
|
|
558
|
-
export function
|
|
559
|
-
const
|
|
560
|
-
|
|
1032
|
+
export function jaroWinkler(a, b) {
|
|
1033
|
+
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
1034
|
+
const len0 = WASM_VECTOR_LEN;
|
|
1035
|
+
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
1036
|
+
const len1 = WASM_VECTOR_LEN;
|
|
1037
|
+
const ret = wasm.jaroWinkler(ptr0, len0, ptr1, len1);
|
|
1038
|
+
return ret;
|
|
561
1039
|
}
|
|
562
1040
|
|
|
563
1041
|
/**
|
|
564
|
-
* Compute the
|
|
1042
|
+
* Compute the normalized Levenshtein similarity between two strings.
|
|
565
1043
|
*
|
|
566
1044
|
* Returns a value between 0.0 (completely different) and 1.0 (identical).
|
|
567
|
-
* Particularly effective for short strings like names.
|
|
568
1045
|
*
|
|
569
1046
|
* # JavaScript Example
|
|
570
1047
|
*
|
|
571
1048
|
* ```javascript
|
|
572
|
-
* import {
|
|
1049
|
+
* import { normalizedLevenshtein } from 'elid';
|
|
573
1050
|
*
|
|
574
|
-
* const similarity =
|
|
575
|
-
* console.log(similarity); // ~0.
|
|
1051
|
+
* const similarity = normalizedLevenshtein("hello", "hallo");
|
|
1052
|
+
* console.log(similarity); // ~0.8
|
|
576
1053
|
* ```
|
|
577
1054
|
* @param {string} a
|
|
578
1055
|
* @param {string} b
|
|
579
1056
|
* @returns {number}
|
|
580
1057
|
*/
|
|
581
|
-
export function
|
|
1058
|
+
export function normalizedLevenshtein(a, b) {
|
|
582
1059
|
const ptr0 = passStringToWasm0(a, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
583
1060
|
const len0 = WASM_VECTOR_LEN;
|
|
584
1061
|
const ptr1 = passStringToWasm0(b, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
585
1062
|
const len1 = WASM_VECTOR_LEN;
|
|
586
|
-
const ret = wasm.
|
|
1063
|
+
const ret = wasm.normalizedLevenshtein(ptr0, len0, ptr1, len1);
|
|
587
1064
|
return ret;
|
|
588
1065
|
}
|
|
589
1066
|
|
|
1067
|
+
let cachedUint32ArrayMemory0 = null;
|
|
1068
|
+
|
|
1069
|
+
function getUint32ArrayMemory0() {
|
|
1070
|
+
if (cachedUint32ArrayMemory0 === null || cachedUint32ArrayMemory0.byteLength === 0) {
|
|
1071
|
+
cachedUint32ArrayMemory0 = new Uint32Array(wasm.memory.buffer);
|
|
1072
|
+
}
|
|
1073
|
+
return cachedUint32ArrayMemory0;
|
|
1074
|
+
}
|
|
1075
|
+
|
|
1076
|
+
function getArrayU32FromWasm0(ptr, len) {
|
|
1077
|
+
ptr = ptr >>> 0;
|
|
1078
|
+
return getUint32ArrayMemory0().subarray(ptr / 4, ptr / 4 + len);
|
|
1079
|
+
}
|
|
590
1080
|
/**
|
|
591
|
-
* Find
|
|
1081
|
+
* Find all hashes within a given distance threshold.
|
|
592
1082
|
*
|
|
593
|
-
*
|
|
1083
|
+
* Useful for database queries - pre-compute hashes, then find similar ones.
|
|
594
1084
|
*
|
|
595
1085
|
* # JavaScript Example
|
|
596
1086
|
*
|
|
597
1087
|
* ```javascript
|
|
598
|
-
* import {
|
|
1088
|
+
* import { simhash, findSimilarHashes } from 'elid';
|
|
599
1089
|
*
|
|
600
|
-
* const candidates = ["
|
|
601
|
-
* const
|
|
602
|
-
*
|
|
1090
|
+
* const candidates = ["iPhone 14 Pro", "iPhone 13", "Galaxy S23"];
|
|
1091
|
+
* const hashes = candidates.map(s => simhash(s));
|
|
1092
|
+
*
|
|
1093
|
+
* const queryHash = simhash("iPhone 14");
|
|
1094
|
+
* const matches = findSimilarHashes(queryHash, hashes, 10);
|
|
1095
|
+
*
|
|
1096
|
+
* console.log(matches); // [0, 1] - indices of similar items
|
|
603
1097
|
* ```
|
|
604
|
-
* @param {
|
|
605
|
-
* @param {
|
|
606
|
-
* @
|
|
1098
|
+
* @param {number} query_hash
|
|
1099
|
+
* @param {Float64Array} candidate_hashes
|
|
1100
|
+
* @param {number} max_distance
|
|
1101
|
+
* @returns {Uint32Array}
|
|
607
1102
|
*/
|
|
608
|
-
export function
|
|
609
|
-
const ptr0 =
|
|
1103
|
+
export function findSimilarHashes(query_hash, candidate_hashes, max_distance) {
|
|
1104
|
+
const ptr0 = passArrayF64ToWasm0(candidate_hashes, wasm.__wbindgen_malloc);
|
|
610
1105
|
const len0 = WASM_VECTOR_LEN;
|
|
611
|
-
const
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
return
|
|
1106
|
+
const ret = wasm.findSimilarHashes(query_hash, ptr0, len0, max_distance);
|
|
1107
|
+
var v2 = getArrayU32FromWasm0(ret[0], ret[1]).slice();
|
|
1108
|
+
wasm.__wbindgen_free(ret[0], ret[1] * 4, 4);
|
|
1109
|
+
return v2;
|
|
615
1110
|
}
|
|
616
1111
|
|
|
1112
|
+
/**
|
|
1113
|
+
* Dimension handling mode for full vector encoding.
|
|
1114
|
+
*
|
|
1115
|
+
* Controls whether to preserve original dimensions, reduce them,
|
|
1116
|
+
* or project to a common space for cross-dimensional comparison.
|
|
1117
|
+
*
|
|
1118
|
+
* # JavaScript Example
|
|
1119
|
+
*
|
|
1120
|
+
* ```javascript
|
|
1121
|
+
* import { ElidDimensionMode, encodeElidFullVector } from 'elid';
|
|
1122
|
+
*
|
|
1123
|
+
* // Preserve all dimensions
|
|
1124
|
+
* // Reduce to fewer dimensions for smaller output
|
|
1125
|
+
* // Common space for comparing different-sized embeddings
|
|
1126
|
+
* ```
|
|
1127
|
+
* @enum {0 | 1 | 2}
|
|
1128
|
+
*/
|
|
1129
|
+
export const ElidDimensionMode = Object.freeze({
|
|
1130
|
+
/**
|
|
1131
|
+
* Preserve all original dimensions (no projection)
|
|
1132
|
+
*/
|
|
1133
|
+
Preserve: 0, "0": "Preserve",
|
|
1134
|
+
/**
|
|
1135
|
+
* Reduce dimensions using random projection
|
|
1136
|
+
*/
|
|
1137
|
+
Reduce: 1, "1": "Reduce",
|
|
1138
|
+
/**
|
|
1139
|
+
* Project to common space for cross-dimensional comparison
|
|
1140
|
+
*/
|
|
1141
|
+
Common: 2, "2": "Common",
|
|
1142
|
+
});
|
|
1143
|
+
/**
|
|
1144
|
+
* ELID encoding profile for vector embeddings.
|
|
1145
|
+
*
|
|
1146
|
+
* Choose a profile based on your use case:
|
|
1147
|
+
* - `Mini128`: Fast 128-bit SimHash, good for similarity via Hamming distance
|
|
1148
|
+
* - `Morton10x10`: Z-order curve encoding, good for range queries
|
|
1149
|
+
* - `Hilbert10x10`: Hilbert curve encoding, best locality preservation
|
|
1150
|
+
*
|
|
1151
|
+
* # JavaScript Example
|
|
1152
|
+
*
|
|
1153
|
+
* ```javascript
|
|
1154
|
+
* import { ElidProfile, encodeElid } from 'elid';
|
|
1155
|
+
*
|
|
1156
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
1157
|
+
* const elid = encodeElid(embedding, ElidProfile.Mini128);
|
|
1158
|
+
* ```
|
|
1159
|
+
* @enum {0 | 1 | 2}
|
|
1160
|
+
*/
|
|
1161
|
+
export const ElidProfile = Object.freeze({
|
|
1162
|
+
/**
|
|
1163
|
+
* 128-bit SimHash (cosine similarity via Hamming distance)
|
|
1164
|
+
*/
|
|
1165
|
+
Mini128: 0, "0": "Mini128",
|
|
1166
|
+
/**
|
|
1167
|
+
* Morton/Z-order curve encoding (10 dims, 10 bits each)
|
|
1168
|
+
*/
|
|
1169
|
+
Morton10x10: 1, "1": "Morton10x10",
|
|
1170
|
+
/**
|
|
1171
|
+
* Hilbert curve encoding (10 dims, 10 bits each)
|
|
1172
|
+
*/
|
|
1173
|
+
Hilbert10x10: 2, "2": "Hilbert10x10",
|
|
1174
|
+
});
|
|
1175
|
+
/**
|
|
1176
|
+
* Precision options for full vector encoding.
|
|
1177
|
+
*
|
|
1178
|
+
* Controls how many bits are used to represent each dimension value.
|
|
1179
|
+
* Higher precision means more accurate reconstruction but larger output.
|
|
1180
|
+
*
|
|
1181
|
+
* # JavaScript Example
|
|
1182
|
+
*
|
|
1183
|
+
* ```javascript
|
|
1184
|
+
* import { ElidVectorPrecision, encodeElidWithPrecision } from 'elid';
|
|
1185
|
+
*
|
|
1186
|
+
* const embedding = new Float64Array(768).fill(0.1);
|
|
1187
|
+
* // Full32 = lossless, Half16 = smaller with minimal error
|
|
1188
|
+
* ```
|
|
1189
|
+
* @enum {0 | 1 | 2}
|
|
1190
|
+
*/
|
|
1191
|
+
export const ElidVectorPrecision = Object.freeze({
|
|
1192
|
+
/**
|
|
1193
|
+
* Full 32-bit float (lossless, 4 bytes per dimension)
|
|
1194
|
+
*/
|
|
1195
|
+
Full32: 0, "0": "Full32",
|
|
1196
|
+
/**
|
|
1197
|
+
* 16-bit half-precision float (2 bytes per dimension)
|
|
1198
|
+
*/
|
|
1199
|
+
Half16: 1, "1": "Half16",
|
|
1200
|
+
/**
|
|
1201
|
+
* 8-bit quantized (1 byte per dimension, ~1% error)
|
|
1202
|
+
*/
|
|
1203
|
+
Quant8: 2, "2": "Quant8",
|
|
1204
|
+
});
|
|
1205
|
+
|
|
617
1206
|
const SimilarityOptionsFinalization = (typeof FinalizationRegistry === 'undefined')
|
|
618
1207
|
? { register: () => {}, unregister: () => {} }
|
|
619
1208
|
: new FinalizationRegistry(ptr => wasm.__wbg_similarityoptions_free(ptr >>> 0, 1));
|
|
@@ -757,6 +1346,11 @@ export function __wbg_new_e17d9f43105b08be() {
|
|
|
757
1346
|
return ret;
|
|
758
1347
|
};
|
|
759
1348
|
|
|
1349
|
+
export function __wbg_new_with_length_cd045ed0a87d4dd6(arg0) {
|
|
1350
|
+
const ret = new Float64Array(arg0 >>> 0);
|
|
1351
|
+
return ret;
|
|
1352
|
+
};
|
|
1353
|
+
|
|
760
1354
|
export function __wbg_set_3f1d0b984ed272ed(arg0, arg1, arg2) {
|
|
761
1355
|
arg0[arg1] = arg2;
|
|
762
1356
|
};
|
|
@@ -775,6 +1369,10 @@ export function __wbg_set_c2abbebe8b9ebee1() { return handleError(function (arg0
|
|
|
775
1369
|
return ret;
|
|
776
1370
|
}, arguments) };
|
|
777
1371
|
|
|
1372
|
+
export function __wbg_set_index_a0c01b257dd824f8(arg0, arg1, arg2) {
|
|
1373
|
+
arg0[arg1 >>> 0] = arg2;
|
|
1374
|
+
};
|
|
1375
|
+
|
|
778
1376
|
export function __wbindgen_cast_2241b6af4c4b2941(arg0, arg1) {
|
|
779
1377
|
// Cast intrinsic for `Ref(String) -> Externref`.
|
|
780
1378
|
const ret = getStringFromWasm0(arg0, arg1);
|