elid 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/elid.d.ts CHANGED
@@ -1,58 +1,148 @@
1
1
  /* tslint:disable */
2
2
  /* eslint-disable */
3
3
  /**
4
- * Compute the normalized SimHash similarity between two strings.
4
+ * Compute the OSA (Optimal String Alignment) distance between two strings.
5
5
  *
6
- * Returns a value between 0.0 (completely different) and 1.0 (identical).
6
+ * Similar to Levenshtein but also considers transpositions as a single operation.
7
7
  *
8
8
  * # JavaScript Example
9
9
  *
10
10
  * ```javascript
11
- * import { simhashSimilarity } from 'elid';
11
+ * import { osaDistance } from 'elid';
12
12
  *
13
- * const similarity = simhashSimilarity("iPhone 14", "iPhone 15");
14
- * console.log(similarity); // ~0.9 (very similar)
13
+ * const distance = osaDistance("ca", "ac");
14
+ * console.log(distance); // 1 (transposition)
15
+ * ```
16
+ */
17
+ export function osaDistance(a: string, b: string): number;
18
+ /**
19
+ * Compute the Levenshtein distance between two strings.
15
20
  *
16
- * const similarity2 = simhashSimilarity("iPhone", "Galaxy");
17
- * console.log(similarity2); // ~0.4 (different)
21
+ * Returns the minimum number of single-character edits needed to transform one string into another.
22
+ *
23
+ * # JavaScript Example
24
+ *
25
+ * ```javascript
26
+ * import { levenshtein } from 'elid';
27
+ *
28
+ * const distance = levenshtein("kitten", "sitting");
29
+ * console.log(distance); // 3
18
30
  * ```
19
31
  */
20
- export function simhashSimilarity(a: string, b: string): number;
32
+ export function levenshtein(a: string, b: string): number;
21
33
  /**
22
- * Compute the Jaro-Winkler similarity between two strings.
34
+ * Find all matches above a threshold score.
23
35
  *
24
- * Returns a value between 0.0 (completely different) and 1.0 (identical).
25
- * Gives more favorable ratings to strings with common prefixes.
36
+ * Returns an array of objects with index and score for all candidates above the threshold.
26
37
  *
27
38
  * # JavaScript Example
28
39
  *
29
40
  * ```javascript
30
- * import { jaroWinkler } from 'elid';
41
+ * import { findMatchesAboveThreshold } from 'elid';
31
42
  *
32
- * const similarity = jaroWinkler("martha", "marhta");
33
- * console.log(similarity); // ~0.961
43
+ * const candidates = ["apple", "application", "apply", "banana"];
44
+ * const matches = findMatchesAboveThreshold("app", candidates, 0.5);
45
+ * console.log(matches); // [{ index: 0, score: 0.907 }, { index: 1, score: 0.830 }, ...]
34
46
  * ```
35
47
  */
36
- export function jaroWinkler(a: string, b: string): number;
48
+ export function findMatchesAboveThreshold(query: string, candidates: string[], threshold: number): any;
37
49
  /**
38
- * Compute the Hamming distance between two strings.
50
+ * Compute the Hamming distance between two SimHash values.
39
51
  *
40
- * Returns the number of positions at which the characters differ.
41
- * Returns null if strings have different lengths.
52
+ * Returns the number of differing bits. Lower values = higher similarity.
42
53
  *
43
54
  * # JavaScript Example
44
55
  *
45
56
  * ```javascript
46
- * import { hamming } from 'elid';
57
+ * import { simhash, simhashDistance } from 'elid';
47
58
  *
48
- * const distance = hamming("karolin", "kathrin");
49
- * console.log(distance); // 3
59
+ * const hash1 = simhash("iPhone 14");
60
+ * const hash2 = simhash("iPhone 15");
61
+ * const distance = simhashDistance(hash1, hash2);
50
62
  *
51
- * const invalid = hamming("hello", "world!");
52
- * console.log(invalid); // null
63
+ * console.log(distance); // Low number = similar
53
64
  * ```
54
65
  */
55
- export function hamming(a: string, b: string): number | undefined;
66
+ export function simhashDistance(hash1: number, hash2: number): number;
67
+ /**
68
+ * Encode an embedding with a maximum output string length constraint.
69
+ *
70
+ * Calculates the optimal precision and dimension settings to fit within
71
+ * the specified character limit while maximizing fidelity.
72
+ *
73
+ * # Parameters
74
+ *
75
+ * - `embedding`: Float64 array of embedding values (64-2048 dimensions)
76
+ * - `max_chars`: Maximum output string length in characters
77
+ *
78
+ * # Returns
79
+ *
80
+ * A base32hex-encoded ELID string guaranteed to be <= max_chars in length.
81
+ *
82
+ * # JavaScript Example
83
+ *
84
+ * ```javascript
85
+ * import { encodeElidMaxLength } from 'elid';
86
+ *
87
+ * const embedding = new Float64Array(768).fill(0.1);
88
+ *
89
+ * // Fit in 100 characters (e.g., for database column constraints)
90
+ * const elid = encodeElidMaxLength(embedding, 100);
91
+ * console.log(elid.length <= 100); // true
92
+ *
93
+ * // Fit in 50 characters (more compression)
94
+ * const shortElid = encodeElidMaxLength(embedding, 50);
95
+ * ```
96
+ */
97
+ export function encodeElidMaxLength(embedding: Float64Array, max_chars: number): string;
98
+ /**
99
+ * Compute Levenshtein distance with custom options.
100
+ *
101
+ * # JavaScript Example
102
+ *
103
+ * ```javascript
104
+ * import { levenshteinWithOpts, SimilarityOptions } from 'elid';
105
+ *
106
+ * const opts = new SimilarityOptions();
107
+ * opts.setCaseSensitive(false);
108
+ * opts.setTrimWhitespace(true);
109
+ *
110
+ * const distance = levenshteinWithOpts(" HELLO ", "hello", opts);
111
+ * console.log(distance); // 0
112
+ * ```
113
+ */
114
+ export function levenshteinWithOpts(a: string, b: string, opts: SimilarityOptions): number;
115
+ /**
116
+ * Check if an ELID can be decoded back to an embedding.
117
+ *
118
+ * Returns true if the ELID was encoded with a FullVector profile
119
+ * (lossless, compressed, or max_length), false otherwise.
120
+ *
121
+ * # Parameters
122
+ *
123
+ * - `elid_str`: A valid ELID string (base32hex encoded)
124
+ *
125
+ * # Returns
126
+ *
127
+ * `true` if decodeElidToEmbedding will return an embedding, `false` otherwise.
128
+ *
129
+ * # JavaScript Example
130
+ *
131
+ * ```javascript
132
+ * import { encodeElid, encodeElidLossless, isElidReversible, ElidProfile } from 'elid';
133
+ *
134
+ * const embedding = new Float64Array(768).fill(0.1);
135
+ *
136
+ * // Mini128 is NOT reversible
137
+ * const mini128Elid = encodeElid(embedding, ElidProfile.Mini128);
138
+ * console.log(isElidReversible(mini128Elid)); // false
139
+ *
140
+ * // Lossless IS reversible
141
+ * const losslessElid = encodeElidLossless(embedding);
142
+ * console.log(isElidReversible(losslessElid)); // true
143
+ * ```
144
+ */
145
+ export function isElidReversible(elid_str: string): boolean;
56
146
  /**
57
147
  * Compute the SimHash fingerprint of a string.
58
148
  *
@@ -77,103 +167,324 @@ export function hamming(a: string, b: string): number | undefined;
77
167
  */
78
168
  export function simhash(text: string): number;
79
169
  /**
80
- * Compute the Levenshtein distance between two strings.
170
+ * Decode an ELID string to raw bytes.
81
171
  *
82
- * Returns the minimum number of single-character edits needed to transform one string into another.
172
+ * Returns the raw byte representation of an ELID, including the header
173
+ * and payload bytes. Useful for custom processing or debugging.
174
+ *
175
+ * # Parameters
176
+ *
177
+ * - `elid_str`: A valid ELID string (base32hex encoded)
178
+ *
179
+ * # Returns
180
+ *
181
+ * A Uint8Array containing the raw bytes (header + payload).
83
182
  *
84
183
  * # JavaScript Example
85
184
  *
86
185
  * ```javascript
87
- * import { levenshtein } from 'elid';
186
+ * import { decodeElid } from 'elid';
88
187
  *
89
- * const distance = levenshtein("kitten", "sitting");
90
- * console.log(distance); // 3
188
+ * const bytes = decodeElid("012345abcdef...");
189
+ * console.log(bytes); // Uint8Array [...]
91
190
  * ```
92
191
  */
93
- export function levenshtein(a: string, b: string): number;
192
+ export function decodeElid(elid_str: string): Uint8Array;
94
193
  /**
95
- * Find all hashes within a given distance threshold.
194
+ * Get metadata about a FullVector ELID.
96
195
  *
97
- * Useful for database queries - pre-compute hashes, then find similar ones.
196
+ * Returns an object containing information about how the ELID was encoded,
197
+ * including original dimensions, precision, and dimension mode.
198
+ *
199
+ * # Parameters
200
+ *
201
+ * - `elid_str`: A valid ELID string (base32hex encoded)
202
+ *
203
+ * # Returns
204
+ *
205
+ * An object with metadata fields, or null if not a FullVector ELID.
98
206
  *
99
207
  * # JavaScript Example
100
208
  *
101
209
  * ```javascript
102
- * import { simhash, findSimilarHashes } from 'elid';
210
+ * import { encodeElidCompressed, getElidMetadata } from 'elid';
103
211
  *
104
- * const candidates = ["iPhone 14 Pro", "iPhone 13", "Galaxy S23"];
105
- * const hashes = candidates.map(s => simhash(s));
212
+ * const embedding = new Float64Array(768).fill(0.1);
213
+ * const elid = encodeElidCompressed(embedding, 0.5);
106
214
  *
107
- * const queryHash = simhash("iPhone 14");
108
- * const matches = findSimilarHashes(queryHash, hashes, 10);
215
+ * const meta = getElidMetadata(elid);
216
+ * if (meta) {
217
+ * console.log(meta.originalDims); // 768
218
+ * console.log(meta.encodedDims); // depends on compression
219
+ * console.log(meta.isLossless); // false
220
+ * }
221
+ * ```
222
+ */
223
+ export function getElidMetadata(elid_str: string): any;
224
+ /**
225
+ * Encode an embedding for cross-dimensional comparison.
109
226
  *
110
- * console.log(matches); // [0, 1] - indices of similar items
227
+ * Projects the embedding to a common dimension space, allowing comparison
228
+ * between embeddings of different original dimensions (e.g., 256d vs 768d).
229
+ *
230
+ * # Parameters
231
+ *
232
+ * - `embedding`: Float64 array of embedding values (64-2048 dimensions)
233
+ * - `common_dims`: Target dimension space (all vectors projected here)
234
+ *
235
+ * # Returns
236
+ *
237
+ * A base32hex-encoded ELID string.
238
+ *
239
+ * # JavaScript Example
240
+ *
241
+ * ```javascript
242
+ * import { encodeElidCrossDimensional, decodeElidToEmbedding } from 'elid';
243
+ *
244
+ * // Different sized embeddings from different models
245
+ * const embedding256 = new Float64Array(256).fill(0.1);
246
+ * const embedding768 = new Float64Array(768).fill(0.1);
247
+ *
248
+ * // Project both to 128-dim common space
249
+ * const elid1 = encodeElidCrossDimensional(embedding256, 128);
250
+ * const elid2 = encodeElidCrossDimensional(embedding768, 128);
251
+ *
252
+ * // Now they can be compared directly (both decode to 128 dims)
253
+ * const dec1 = decodeElidToEmbedding(elid1);
254
+ * const dec2 = decodeElidToEmbedding(elid2);
255
+ * // Both have length 128
111
256
  * ```
112
257
  */
113
- export function findSimilarHashes(query_hash: number, candidate_hashes: Float64Array, max_distance: number): Uint32Array;
258
+ export function encodeElidCrossDimensional(embedding: Float64Array, common_dims: number): string;
114
259
  /**
115
- * Find all matches above a threshold score.
260
+ * Compute the Hamming distance between two ELID strings.
116
261
  *
117
- * Returns an array of objects with index and score for all candidates above the threshold.
262
+ * Returns the number of differing bits between two Mini128 ELIDs.
263
+ * This distance is proportional to the angular distance between the
264
+ * original embeddings (lower = more similar).
265
+ *
266
+ * # Requirements
267
+ *
268
+ * Both ELIDs must use the Mini128 profile.
269
+ *
270
+ * # Parameters
271
+ *
272
+ * - `elid1`: First ELID string
273
+ * - `elid2`: Second ELID string
274
+ *
275
+ * # Returns
276
+ *
277
+ * Hamming distance (0-128). 0 means identical, 128 means completely different.
118
278
  *
119
279
  * # JavaScript Example
120
280
  *
121
281
  * ```javascript
122
- * import { findMatchesAboveThreshold } from 'elid';
282
+ * import { encodeElid, elidHammingDistance, ElidProfile } from 'elid';
123
283
  *
124
- * const candidates = ["apple", "application", "apply", "banana"];
125
- * const matches = findMatchesAboveThreshold("app", candidates, 0.5);
126
- * console.log(matches); // [{ index: 0, score: 0.907 }, { index: 1, score: 0.830 }, ...]
284
+ * const elid1 = encodeElid(embedding1, ElidProfile.Mini128);
285
+ * const elid2 = encodeElid(embedding2, ElidProfile.Mini128);
286
+ *
287
+ * const distance = elidHammingDistance(elid1, elid2);
288
+ * if (distance < 20) {
289
+ * console.log("Very similar embeddings!");
290
+ * }
127
291
  * ```
128
292
  */
129
- export function findMatchesAboveThreshold(query: string, candidates: string[], threshold: number): any;
293
+ export function elidHammingDistance(elid1: string, elid2: string): number;
130
294
  /**
131
- * Compute the OSA (Optimal String Alignment) distance between two strings.
295
+ * Find the best match for a query string in an array of candidates.
132
296
  *
133
- * Similar to Levenshtein but also considers transpositions as a single operation.
297
+ * Returns an object with the index and similarity score of the best match.
134
298
  *
135
299
  * # JavaScript Example
136
300
  *
137
301
  * ```javascript
138
- * import { osaDistance } from 'elid';
302
+ * import { findBestMatch } from 'elid';
139
303
  *
140
- * const distance = osaDistance("ca", "ac");
141
- * console.log(distance); // 1 (transposition)
304
+ * const candidates = ["apple", "application", "apply"];
305
+ * const result = findBestMatch("app", candidates);
306
+ * console.log(result); // { index: 0, score: 0.907 }
142
307
  * ```
143
308
  */
144
- export function osaDistance(a: string, b: string): number;
309
+ export function findBestMatch(query: string, candidates: string[]): object;
145
310
  /**
146
- * Compute the normalized Levenshtein similarity between two strings.
311
+ * Encode an embedding with percentage-based compression.
312
+ *
313
+ * The retention percentage (0.0-1.0) controls how much information is preserved:
314
+ * - 1.0 = lossless (Full32 precision, all dimensions)
315
+ * - 0.5 = half precision and/or half dimensions
316
+ * - 0.25 = quarter precision and/or quarter dimensions
317
+ *
318
+ * The algorithm optimizes for dimension reduction first (which preserves
319
+ * more geometric relationships) before reducing precision.
320
+ *
321
+ * # Parameters
322
+ *
323
+ * - `embedding`: Float64 array of embedding values (64-2048 dimensions)
324
+ * - `retention_pct`: Information retention percentage (0.0-1.0)
325
+ *
326
+ * # Returns
327
+ *
328
+ * A base32hex-encoded ELID string.
329
+ *
330
+ * # JavaScript Example
331
+ *
332
+ * ```javascript
333
+ * import { encodeElidCompressed } from 'elid';
334
+ *
335
+ * const embedding = new Float64Array(768).fill(0.1);
336
+ *
337
+ * // 50% retention - good balance of size and fidelity
338
+ * const elid = encodeElidCompressed(embedding, 0.5);
339
+ *
340
+ * // 25% retention - smaller but less accurate
341
+ * const smallElid = encodeElidCompressed(embedding, 0.25);
342
+ * ```
343
+ */
344
+ export function encodeElidCompressed(embedding: Float64Array, retention_pct: number): string;
345
+ /**
346
+ * Compute the Jaro similarity between two strings.
147
347
  *
148
348
  * Returns a value between 0.0 (completely different) and 1.0 (identical).
349
+ * Particularly effective for short strings like names.
149
350
  *
150
351
  * # JavaScript Example
151
352
  *
152
353
  * ```javascript
153
- * import { normalizedLevenshtein } from 'elid';
354
+ * import { jaro } from 'elid';
154
355
  *
155
- * const similarity = normalizedLevenshtein("hello", "hallo");
156
- * console.log(similarity); // ~0.8
356
+ * const similarity = jaro("martha", "marhta");
357
+ * console.log(similarity); // ~0.944
157
358
  * ```
158
359
  */
159
- export function normalizedLevenshtein(a: string, b: string): number;
360
+ export function jaro(a: string, b: string): number;
160
361
  /**
161
- * Compute Levenshtein distance with custom options.
362
+ * Encode an embedding using lossless full vector encoding.
363
+ *
364
+ * Preserves the exact embedding values (32-bit float precision) and all dimensions.
365
+ * This produces the largest output but allows exact reconstruction.
366
+ *
367
+ * # Parameters
368
+ *
369
+ * - `embedding`: Float64 array of embedding values (64-2048 dimensions)
370
+ *
371
+ * # Returns
372
+ *
373
+ * A base32hex-encoded ELID string that can be decoded back to the original embedding.
162
374
  *
163
375
  * # JavaScript Example
164
376
  *
165
377
  * ```javascript
166
- * import { levenshteinWithOpts, SimilarityOptions } from 'elid';
378
+ * import { encodeElidLossless, decodeElidToEmbedding } from 'elid';
167
379
  *
168
- * const opts = new SimilarityOptions();
169
- * opts.setCaseSensitive(false);
170
- * opts.setTrimWhitespace(true);
380
+ * const embedding = new Float64Array(768).fill(0.1);
381
+ * const elid = encodeElidLossless(embedding);
171
382
  *
172
- * const distance = levenshteinWithOpts(" HELLO ", "hello", opts);
173
- * console.log(distance); // 0
383
+ * // Later, recover the exact embedding
384
+ * const recovered = decodeElidToEmbedding(elid);
385
+ * // recovered is identical to embedding
174
386
  * ```
175
387
  */
176
- export function levenshteinWithOpts(a: string, b: string, opts: SimilarityOptions): number;
388
+ export function encodeElidLossless(embedding: Float64Array): string;
389
+ /**
390
+ * Decode an ELID string back to an embedding vector.
391
+ *
392
+ * Only works for ELIDs encoded with a FullVector profile (lossless,
393
+ * compressed, or max_length). Returns null for non-reversible profiles
394
+ * like Mini128, Morton, or Hilbert.
395
+ *
396
+ * # Parameters
397
+ *
398
+ * - `elid_str`: A valid ELID string (base32hex encoded)
399
+ *
400
+ * # Returns
401
+ *
402
+ * A Float64Array containing the decoded embedding, or null if the ELID
403
+ * is not reversible.
404
+ *
405
+ * Note: If dimension reduction was used during encoding, the decoded
406
+ * embedding will be in the reduced dimension space, not the original.
407
+ *
408
+ * # JavaScript Example
409
+ *
410
+ * ```javascript
411
+ * import { encodeElidLossless, decodeElidToEmbedding, isElidReversible } from 'elid';
412
+ *
413
+ * const embedding = new Float64Array(768).fill(0.1);
414
+ * const elid = encodeElidLossless(embedding);
415
+ *
416
+ * if (isElidReversible(elid)) {
417
+ * const recovered = decodeElidToEmbedding(elid);
418
+ * console.log(recovered.length); // 768
419
+ * }
420
+ * ```
421
+ */
422
+ export function decodeElidToEmbedding(elid_str: string): any;
423
+ /**
424
+ * Encode an embedding vector to an ELID string.
425
+ *
426
+ * Converts a high-dimensional embedding (64-2048 dimensions) into a compact,
427
+ * sortable identifier. The ELID preserves locality properties for efficient
428
+ * similarity search.
429
+ *
430
+ * # Parameters
431
+ *
432
+ * - `embedding`: Float64 array of embedding values (64-2048 dimensions)
433
+ * - `profile`: Encoding profile (Mini128, Morton10x10, or Hilbert10x10)
434
+ *
435
+ * # Returns
436
+ *
437
+ * A base32hex-encoded ELID string suitable for storage and comparison.
438
+ *
439
+ * # JavaScript Example
440
+ *
441
+ * ```javascript
442
+ * import { encodeElid, ElidProfile } from 'elid';
443
+ *
444
+ * // OpenAI embeddings are 1536 dimensions
445
+ * const embedding = await getEmbedding("Hello world");
446
+ * const elid = encodeElid(embedding, ElidProfile.Mini128);
447
+ * console.log(elid); // "012345abcdef..."
448
+ * ```
449
+ */
450
+ export function encodeElid(embedding: Float64Array, profile: ElidProfile): string;
451
+ /**
452
+ * Compute the normalized SimHash similarity between two strings.
453
+ *
454
+ * Returns a value between 0.0 (completely different) and 1.0 (identical).
455
+ *
456
+ * # JavaScript Example
457
+ *
458
+ * ```javascript
459
+ * import { simhashSimilarity } from 'elid';
460
+ *
461
+ * const similarity = simhashSimilarity("iPhone 14", "iPhone 15");
462
+ * console.log(similarity); // ~0.9 (very similar)
463
+ *
464
+ * const similarity2 = simhashSimilarity("iPhone", "Galaxy");
465
+ * console.log(similarity2); // ~0.4 (different)
466
+ * ```
467
+ */
468
+ export function simhashSimilarity(a: string, b: string): number;
469
+ /**
470
+ * Compute the Hamming distance between two strings.
471
+ *
472
+ * Returns the number of positions at which the characters differ.
473
+ * Returns null if strings have different lengths.
474
+ *
475
+ * # JavaScript Example
476
+ *
477
+ * ```javascript
478
+ * import { hamming } from 'elid';
479
+ *
480
+ * const distance = hamming("karolin", "kathrin");
481
+ * console.log(distance); // 3
482
+ *
483
+ * const invalid = hamming("hello", "world!");
484
+ * console.log(invalid); // null
485
+ * ```
486
+ */
487
+ export function hamming(a: string, b: string): number | undefined;
177
488
  /**
178
489
  * Compute the best matching similarity between two strings.
179
490
  *
@@ -190,55 +501,146 @@ export function levenshteinWithOpts(a: string, b: string, opts: SimilarityOption
190
501
  */
191
502
  export function bestMatch(a: string, b: string): number;
192
503
  /**
193
- * Compute the Hamming distance between two SimHash values.
504
+ * Compute the Jaro-Winkler similarity between two strings.
194
505
  *
195
- * Returns the number of differing bits. Lower values = higher similarity.
506
+ * Returns a value between 0.0 (completely different) and 1.0 (identical).
507
+ * Gives more favorable ratings to strings with common prefixes.
196
508
  *
197
509
  * # JavaScript Example
198
510
  *
199
511
  * ```javascript
200
- * import { simhash, simhashDistance } from 'elid';
201
- *
202
- * const hash1 = simhash("iPhone 14");
203
- * const hash2 = simhash("iPhone 15");
204
- * const distance = simhashDistance(hash1, hash2);
512
+ * import { jaroWinkler } from 'elid';
205
513
  *
206
- * console.log(distance); // Low number = similar
514
+ * const similarity = jaroWinkler("martha", "marhta");
515
+ * console.log(similarity); // ~0.961
207
516
  * ```
208
517
  */
209
- export function simhashDistance(hash1: number, hash2: number): number;
518
+ export function jaroWinkler(a: string, b: string): number;
210
519
  /**
211
- * Compute the Jaro similarity between two strings.
520
+ * Compute the normalized Levenshtein similarity between two strings.
212
521
  *
213
522
  * Returns a value between 0.0 (completely different) and 1.0 (identical).
214
- * Particularly effective for short strings like names.
215
523
  *
216
524
  * # JavaScript Example
217
525
  *
218
526
  * ```javascript
219
- * import { jaro } from 'elid';
527
+ * import { normalizedLevenshtein } from 'elid';
220
528
  *
221
- * const similarity = jaro("martha", "marhta");
222
- * console.log(similarity); // ~0.944
529
+ * const similarity = normalizedLevenshtein("hello", "hallo");
530
+ * console.log(similarity); // ~0.8
223
531
  * ```
224
532
  */
225
- export function jaro(a: string, b: string): number;
533
+ export function normalizedLevenshtein(a: string, b: string): number;
226
534
  /**
227
- * Find the best match for a query string in an array of candidates.
535
+ * Find all hashes within a given distance threshold.
228
536
  *
229
- * Returns an object with the index and similarity score of the best match.
537
+ * Useful for database queries - pre-compute hashes, then find similar ones.
230
538
  *
231
539
  * # JavaScript Example
232
540
  *
233
541
  * ```javascript
234
- * import { findBestMatch } from 'elid';
542
+ * import { simhash, findSimilarHashes } from 'elid';
235
543
  *
236
- * const candidates = ["apple", "application", "apply"];
237
- * const result = findBestMatch("app", candidates);
238
- * console.log(result); // { index: 0, score: 0.907 }
544
+ * const candidates = ["iPhone 14 Pro", "iPhone 13", "Galaxy S23"];
545
+ * const hashes = candidates.map(s => simhash(s));
546
+ *
547
+ * const queryHash = simhash("iPhone 14");
548
+ * const matches = findSimilarHashes(queryHash, hashes, 10);
549
+ *
550
+ * console.log(matches); // [0, 1] - indices of similar items
239
551
  * ```
240
552
  */
241
- export function findBestMatch(query: string, candidates: string[]): object;
553
+ export function findSimilarHashes(query_hash: number, candidate_hashes: Float64Array, max_distance: number): Uint32Array;
554
+ /**
555
+ * Dimension handling mode for full vector encoding.
556
+ *
557
+ * Controls whether to preserve original dimensions, reduce them,
558
+ * or project to a common space for cross-dimensional comparison.
559
+ *
560
+ * # JavaScript Example
561
+ *
562
+ * ```javascript
563
+ * import { ElidDimensionMode, encodeElidFullVector } from 'elid';
564
+ *
565
+ * // Preserve all dimensions
566
+ * // Reduce to fewer dimensions for smaller output
567
+ * // Common space for comparing different-sized embeddings
568
+ * ```
569
+ */
570
+ export enum ElidDimensionMode {
571
+ /**
572
+ * Preserve all original dimensions (no projection)
573
+ */
574
+ Preserve = 0,
575
+ /**
576
+ * Reduce dimensions using random projection
577
+ */
578
+ Reduce = 1,
579
+ /**
580
+ * Project to common space for cross-dimensional comparison
581
+ */
582
+ Common = 2,
583
+ }
584
+ /**
585
+ * ELID encoding profile for vector embeddings.
586
+ *
587
+ * Choose a profile based on your use case:
588
+ * - `Mini128`: Fast 128-bit SimHash, good for similarity via Hamming distance
589
+ * - `Morton10x10`: Z-order curve encoding, good for range queries
590
+ * - `Hilbert10x10`: Hilbert curve encoding, best locality preservation
591
+ *
592
+ * # JavaScript Example
593
+ *
594
+ * ```javascript
595
+ * import { ElidProfile, encodeElid } from 'elid';
596
+ *
597
+ * const embedding = new Float64Array(768).fill(0.1);
598
+ * const elid = encodeElid(embedding, ElidProfile.Mini128);
599
+ * ```
600
+ */
601
+ export enum ElidProfile {
602
+ /**
603
+ * 128-bit SimHash (cosine similarity via Hamming distance)
604
+ */
605
+ Mini128 = 0,
606
+ /**
607
+ * Morton/Z-order curve encoding (10 dims, 10 bits each)
608
+ */
609
+ Morton10x10 = 1,
610
+ /**
611
+ * Hilbert curve encoding (10 dims, 10 bits each)
612
+ */
613
+ Hilbert10x10 = 2,
614
+ }
615
+ /**
616
+ * Precision options for full vector encoding.
617
+ *
618
+ * Controls how many bits are used to represent each dimension value.
619
+ * Higher precision means more accurate reconstruction but larger output.
620
+ *
621
+ * # JavaScript Example
622
+ *
623
+ * ```javascript
624
+ * import { ElidVectorPrecision, encodeElidWithPrecision } from 'elid';
625
+ *
626
+ * const embedding = new Float64Array(768).fill(0.1);
627
+ * // Full32 = lossless, Half16 = smaller with minimal error
628
+ * ```
629
+ */
630
+ export enum ElidVectorPrecision {
631
+ /**
632
+ * Full 32-bit float (lossless, 4 bytes per dimension)
633
+ */
634
+ Full32 = 0,
635
+ /**
636
+ * 16-bit half-precision float (2 bytes per dimension)
637
+ */
638
+ Half16 = 1,
639
+ /**
640
+ * 8-bit quantized (1 byte per dimension, ~1% error)
641
+ */
642
+ Quant8 = 2,
643
+ }
242
644
  /**
243
645
  * Options for configuring string similarity algorithms
244
646
  */