elid 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/elid.d.ts CHANGED
@@ -1,147 +1,204 @@
1
1
  /* tslint:disable */
2
2
  /* eslint-disable */
3
3
  /**
4
- * Compute the normalized SimHash similarity between two strings.
4
+ * Compute the SimHash fingerprint of a string.
5
5
  *
6
- * Returns a value between 0.0 (completely different) and 1.0 (identical).
6
+ * Returns a 64-bit hash where similar strings produce similar numbers.
7
+ * Use this for database queries by storing the hash and querying by numeric range.
7
8
  *
8
9
  * # JavaScript Example
9
10
  *
10
11
  * ```javascript
11
- * import { simhashSimilarity } from 'elid';
12
+ * import { simhash } from 'elid';
12
13
  *
13
- * const similarity = simhashSimilarity("iPhone 14", "iPhone 15");
14
- * console.log(similarity); // ~0.9 (very similar)
14
+ * const hash1 = simhash("iPhone 14");
15
+ * const hash2 = simhash("iPhone 15");
16
+ * const hash3 = simhash("Galaxy S23");
15
17
  *
16
- * const similarity2 = simhashSimilarity("iPhone", "Galaxy");
17
- * console.log(similarity2); // ~0.4 (different)
18
+ * // hash1 and hash2 will be numerically close
19
+ * // hash3 will be numerically distant
20
+ *
21
+ * // Store in database as bigint:
22
+ * // { name: "iPhone 14", simhash: hash1 }
18
23
  * ```
19
24
  */
20
- export function simhashSimilarity(a: string, b: string): number;
25
+ export function simhash(text: string): number;
21
26
  /**
22
- * Compute the Jaro-Winkler similarity between two strings.
27
+ * Find all matches above a threshold score.
23
28
  *
24
- * Returns a value between 0.0 (completely different) and 1.0 (identical).
25
- * Gives more favorable ratings to strings with common prefixes.
29
+ * Returns an array of objects with index and score for all candidates above the threshold.
26
30
  *
27
31
  * # JavaScript Example
28
32
  *
29
33
  * ```javascript
30
- * import { jaroWinkler } from 'elid';
34
+ * import { findMatchesAboveThreshold } from 'elid';
31
35
  *
32
- * const similarity = jaroWinkler("martha", "marhta");
33
- * console.log(similarity); // ~0.961
36
+ * const candidates = ["apple", "application", "apply", "banana"];
37
+ * const matches = findMatchesAboveThreshold("app", candidates, 0.5);
38
+ * console.log(matches); // [{ index: 0, score: 0.907 }, { index: 1, score: 0.830 }, ...]
34
39
  * ```
35
40
  */
36
- export function jaroWinkler(a: string, b: string): number;
41
+ export function findMatchesAboveThreshold(query: string, candidates: string[], threshold: number): any;
37
42
  /**
38
- * Compute the Hamming distance between two strings.
43
+ * Encode an embedding for cross-dimensional comparison.
39
44
  *
40
- * Returns the number of positions at which the characters differ.
41
- * Returns null if strings have different lengths.
45
+ * Projects the embedding to a common dimension space, allowing comparison
46
+ * between embeddings of different original dimensions (e.g., 256d vs 768d).
47
+ *
48
+ * # Parameters
49
+ *
50
+ * - `embedding`: Float64 array of embedding values (64-2048 dimensions)
51
+ * - `common_dims`: Target dimension space (all vectors projected here)
52
+ *
53
+ * # Returns
54
+ *
55
+ * A base32hex-encoded ELID string.
42
56
  *
43
57
  * # JavaScript Example
44
58
  *
45
59
  * ```javascript
46
- * import { hamming } from 'elid';
60
+ * import { encodeElidCrossDimensional, decodeElidToEmbedding } from 'elid';
47
61
  *
48
- * const distance = hamming("karolin", "kathrin");
49
- * console.log(distance); // 3
62
+ * // Different sized embeddings from different models
63
+ * const embedding256 = new Float64Array(256).fill(0.1);
64
+ * const embedding768 = new Float64Array(768).fill(0.1);
50
65
  *
51
- * const invalid = hamming("hello", "world!");
52
- * console.log(invalid); // null
66
+ * // Project both to 128-dim common space
67
+ * const elid1 = encodeElidCrossDimensional(embedding256, 128);
68
+ * const elid2 = encodeElidCrossDimensional(embedding768, 128);
69
+ *
70
+ * // Now they can be compared directly (both decode to 128 dims)
71
+ * const dec1 = decodeElidToEmbedding(elid1);
72
+ * const dec2 = decodeElidToEmbedding(elid2);
73
+ * // Both have length 128
53
74
  * ```
54
75
  */
55
- export function hamming(a: string, b: string): number | undefined;
76
+ export function encodeElidCrossDimensional(embedding: Float64Array, common_dims: number): string;
56
77
  /**
57
- * Compute the SimHash fingerprint of a string.
78
+ * Find all hashes within a given distance threshold.
58
79
  *
59
- * Returns a 64-bit hash where similar strings produce similar numbers.
60
- * Use this for database queries by storing the hash and querying by numeric range.
80
+ * Useful for database queries - pre-compute hashes, then find similar ones.
61
81
  *
62
82
  * # JavaScript Example
63
83
  *
64
84
  * ```javascript
65
- * import { simhash } from 'elid';
85
+ * import { simhash, findSimilarHashes } from 'elid';
66
86
  *
67
- * const hash1 = simhash("iPhone 14");
68
- * const hash2 = simhash("iPhone 15");
69
- * const hash3 = simhash("Galaxy S23");
87
+ * const candidates = ["iPhone 14 Pro", "iPhone 13", "Galaxy S23"];
88
+ * const hashes = candidates.map(s => simhash(s));
70
89
  *
71
- * // hash1 and hash2 will be numerically close
72
- * // hash3 will be numerically distant
90
+ * const queryHash = simhash("iPhone 14");
91
+ * const matches = findSimilarHashes(queryHash, hashes, 10);
73
92
  *
74
- * // Store in database as bigint:
75
- * // { name: "iPhone 14", simhash: hash1 }
93
+ * console.log(matches); // [0, 1] - indices of similar items
76
94
  * ```
77
95
  */
78
- export function simhash(text: string): number;
96
+ export function findSimilarHashes(query_hash: number, candidate_hashes: Float64Array, max_distance: number): Uint32Array;
79
97
  /**
80
- * Compute the Levenshtein distance between two strings.
98
+ * Compute the Jaro similarity between two strings.
81
99
  *
82
- * Returns the minimum number of single-character edits needed to transform one string into another.
100
+ * Returns a value between 0.0 (completely different) and 1.0 (identical).
101
+ * Particularly effective for short strings like names.
83
102
  *
84
103
  * # JavaScript Example
85
104
  *
86
105
  * ```javascript
87
- * import { levenshtein } from 'elid';
106
+ * import { jaro } from 'elid';
88
107
  *
89
- * const distance = levenshtein("kitten", "sitting");
90
- * console.log(distance); // 3
108
+ * const similarity = jaro("martha", "marhta");
109
+ * console.log(similarity); // ~0.944
91
110
  * ```
92
111
  */
93
- export function levenshtein(a: string, b: string): number;
112
+ export function jaro(a: string, b: string): number;
94
113
  /**
95
- * Find all hashes within a given distance threshold.
114
+ * Compute the best matching similarity between two strings.
96
115
  *
97
- * Useful for database queries - pre-compute hashes, then find similar ones.
116
+ * Runs multiple algorithms and returns the highest score.
98
117
  *
99
118
  * # JavaScript Example
100
119
  *
101
120
  * ```javascript
102
- * import { simhash, findSimilarHashes } from 'elid';
121
+ * import { bestMatch } from 'elid';
103
122
  *
104
- * const candidates = ["iPhone 14 Pro", "iPhone 13", "Galaxy S23"];
105
- * const hashes = candidates.map(s => simhash(s));
123
+ * const score = bestMatch("hello", "hallo");
124
+ * console.log(score); // ~0.8
125
+ * ```
126
+ */
127
+ export function bestMatch(a: string, b: string): number;
128
+ /**
129
+ * Encode an embedding vector to an ELID string.
106
130
  *
107
- * const queryHash = simhash("iPhone 14");
108
- * const matches = findSimilarHashes(queryHash, hashes, 10);
131
+ * Converts a high-dimensional embedding (64-2048 dimensions) into a compact,
132
+ * sortable identifier. The ELID preserves locality properties for efficient
133
+ * similarity search.
109
134
  *
110
- * console.log(matches); // [0, 1] - indices of similar items
135
+ * # Parameters
136
+ *
137
+ * - `embedding`: Float64 array of embedding values (64-2048 dimensions)
138
+ * - `profile`: Encoding profile (Mini128, Morton10x10, or Hilbert10x10)
139
+ *
140
+ * # Returns
141
+ *
142
+ * A base32hex-encoded ELID string suitable for storage and comparison.
143
+ *
144
+ * # JavaScript Example
145
+ *
146
+ * ```javascript
147
+ * import { encodeElid, ElidProfile } from 'elid';
148
+ *
149
+ * // OpenAI embeddings are 1536 dimensions
150
+ * const embedding = await getEmbedding("Hello world");
151
+ * const elid = encodeElid(embedding, ElidProfile.Mini128);
152
+ * console.log(elid); // "012345abcdef..."
111
153
  * ```
112
154
  */
113
- export function findSimilarHashes(query_hash: number, candidate_hashes: Float64Array, max_distance: number): Uint32Array;
155
+ export function encodeElid(embedding: Float64Array, profile: ElidProfile): string;
114
156
  /**
115
- * Find all matches above a threshold score.
157
+ * Compute the normalized SimHash similarity between two strings.
116
158
  *
117
- * Returns an array of objects with index and score for all candidates above the threshold.
159
+ * Returns a value between 0.0 (completely different) and 1.0 (identical).
118
160
  *
119
161
  * # JavaScript Example
120
162
  *
121
163
  * ```javascript
122
- * import { findMatchesAboveThreshold } from 'elid';
164
+ * import { simhashSimilarity } from 'elid';
123
165
  *
124
- * const candidates = ["apple", "application", "apply", "banana"];
125
- * const matches = findMatchesAboveThreshold("app", candidates, 0.5);
126
- * console.log(matches); // [{ index: 0, score: 0.907 }, { index: 1, score: 0.830 }, ...]
166
+ * const similarity = simhashSimilarity("iPhone 14", "iPhone 15");
167
+ * console.log(similarity); // ~0.9 (very similar)
168
+ *
169
+ * const similarity2 = simhashSimilarity("iPhone", "Galaxy");
170
+ * console.log(similarity2); // ~0.4 (different)
127
171
  * ```
128
172
  */
129
- export function findMatchesAboveThreshold(query: string, candidates: string[], threshold: number): any;
173
+ export function simhashSimilarity(a: string, b: string): number;
130
174
  /**
131
- * Compute the OSA (Optimal String Alignment) distance between two strings.
175
+ * Encode an embedding using lossless full vector encoding.
132
176
  *
133
- * Similar to Levenshtein but also considers transpositions as a single operation.
177
+ * Preserves the exact embedding values (32-bit float precision) and all dimensions.
178
+ * This produces the largest output but allows exact reconstruction.
179
+ *
180
+ * # Parameters
181
+ *
182
+ * - `embedding`: Float64 array of embedding values (64-2048 dimensions)
183
+ *
184
+ * # Returns
185
+ *
186
+ * A base32hex-encoded ELID string that can be decoded back to the original embedding.
134
187
  *
135
188
  * # JavaScript Example
136
189
  *
137
190
  * ```javascript
138
- * import { osaDistance } from 'elid';
191
+ * import { encodeElidLossless, decodeElidToEmbedding } from 'elid';
139
192
  *
140
- * const distance = osaDistance("ca", "ac");
141
- * console.log(distance); // 1 (transposition)
193
+ * const embedding = new Float64Array(768).fill(0.1);
194
+ * const elid = encodeElidLossless(embedding);
195
+ *
196
+ * // Later, recover the exact embedding
197
+ * const recovered = decodeElidToEmbedding(elid);
198
+ * // recovered is identical to embedding
142
199
  * ```
143
200
  */
144
- export function osaDistance(a: string, b: string): number;
201
+ export function encodeElidLossless(embedding: Float64Array): string;
145
202
  /**
146
203
  * Compute the normalized Levenshtein similarity between two strings.
147
204
  *
@@ -157,6 +214,141 @@ export function osaDistance(a: string, b: string): number;
157
214
  * ```
158
215
  */
159
216
  export function normalizedLevenshtein(a: string, b: string): number;
217
+ /**
218
+ * Decode an ELID string back to an embedding vector.
219
+ *
220
+ * Only works for ELIDs encoded with a FullVector profile (lossless,
221
+ * compressed, or max_length). Returns null for non-reversible profiles
222
+ * like Mini128, Morton, or Hilbert.
223
+ *
224
+ * # Parameters
225
+ *
226
+ * - `elid_str`: A valid ELID string (base32hex encoded)
227
+ *
228
+ * # Returns
229
+ *
230
+ * A Float64Array containing the decoded embedding, or null if the ELID
231
+ * is not reversible.
232
+ *
233
+ * Note: If dimension reduction was used during encoding, the decoded
234
+ * embedding will be in the reduced dimension space, not the original.
235
+ *
236
+ * # JavaScript Example
237
+ *
238
+ * ```javascript
239
+ * import { encodeElidLossless, decodeElidToEmbedding, isElidReversible } from 'elid';
240
+ *
241
+ * const embedding = new Float64Array(768).fill(0.1);
242
+ * const elid = encodeElidLossless(embedding);
243
+ *
244
+ * if (isElidReversible(elid)) {
245
+ * const recovered = decodeElidToEmbedding(elid);
246
+ * console.log(recovered.length); // 768
247
+ * }
248
+ * ```
249
+ */
250
+ export function decodeElidToEmbedding(elid_str: string): any;
251
+ /**
252
+ * Get metadata about a FullVector ELID.
253
+ *
254
+ * Returns an object containing information about how the ELID was encoded,
255
+ * including original dimensions, precision, and dimension mode.
256
+ *
257
+ * # Parameters
258
+ *
259
+ * - `elid_str`: A valid ELID string (base32hex encoded)
260
+ *
261
+ * # Returns
262
+ *
263
+ * An object with metadata fields, or null if not a FullVector ELID.
264
+ *
265
+ * # JavaScript Example
266
+ *
267
+ * ```javascript
268
+ * import { encodeElidCompressed, getElidMetadata } from 'elid';
269
+ *
270
+ * const embedding = new Float64Array(768).fill(0.1);
271
+ * const elid = encodeElidCompressed(embedding, 0.5);
272
+ *
273
+ * const meta = getElidMetadata(elid);
274
+ * if (meta) {
275
+ * console.log(meta.originalDims); // 768
276
+ * console.log(meta.encodedDims); // depends on compression
277
+ * console.log(meta.isLossless); // false
278
+ * }
279
+ * ```
280
+ */
281
+ export function getElidMetadata(elid_str: string): any;
282
+ /**
283
+ * Decode an ELID string to raw bytes.
284
+ *
285
+ * Returns the raw byte representation of an ELID, including the header
286
+ * and payload bytes. Useful for custom processing or debugging.
287
+ *
288
+ * # Parameters
289
+ *
290
+ * - `elid_str`: A valid ELID string (base32hex encoded)
291
+ *
292
+ * # Returns
293
+ *
294
+ * A Uint8Array containing the raw bytes (header + payload).
295
+ *
296
+ * # JavaScript Example
297
+ *
298
+ * ```javascript
299
+ * import { decodeElid } from 'elid';
300
+ *
301
+ * const bytes = decodeElid("012345abcdef...");
302
+ * console.log(bytes); // Uint8Array [...]
303
+ * ```
304
+ */
305
+ export function decodeElid(elid_str: string): Uint8Array;
306
+ /**
307
+ * Compute the Levenshtein distance between two strings.
308
+ *
309
+ * Returns the minimum number of single-character edits needed to transform one string into another.
310
+ *
311
+ * # JavaScript Example
312
+ *
313
+ * ```javascript
314
+ * import { levenshtein } from 'elid';
315
+ *
316
+ * const distance = levenshtein("kitten", "sitting");
317
+ * console.log(distance); // 3
318
+ * ```
319
+ */
320
+ export function levenshtein(a: string, b: string): number;
321
+ /**
322
+ * Check if an ELID can be decoded back to an embedding.
323
+ *
324
+ * Returns true if the ELID was encoded with a FullVector profile
325
+ * (lossless, compressed, or max_length), false otherwise.
326
+ *
327
+ * # Parameters
328
+ *
329
+ * - `elid_str`: A valid ELID string (base32hex encoded)
330
+ *
331
+ * # Returns
332
+ *
333
+ * `true` if decodeElidToEmbedding will return an embedding, `false` otherwise.
334
+ *
335
+ * # JavaScript Example
336
+ *
337
+ * ```javascript
338
+ * import { encodeElid, encodeElidLossless, isElidReversible, ElidProfile } from 'elid';
339
+ *
340
+ * const embedding = new Float64Array(768).fill(0.1);
341
+ *
342
+ * // Mini128 is NOT reversible
343
+ * const mini128Elid = encodeElid(embedding, ElidProfile.Mini128);
344
+ * console.log(isElidReversible(mini128Elid)); // false
345
+ *
346
+ * // Lossless IS reversible
347
+ * const losslessElid = encodeElidLossless(embedding);
348
+ * console.log(isElidReversible(losslessElid)); // true
349
+ * ```
350
+ */
351
+ export function isElidReversible(elid_str: string): boolean;
160
352
  /**
161
353
  * Compute Levenshtein distance with custom options.
162
354
  *
@@ -175,20 +367,71 @@ export function normalizedLevenshtein(a: string, b: string): number;
175
367
  */
176
368
  export function levenshteinWithOpts(a: string, b: string, opts: SimilarityOptions): number;
177
369
  /**
178
- * Compute the best matching similarity between two strings.
370
+ * Compute the Hamming distance between two ELID strings.
179
371
  *
180
- * Runs multiple algorithms and returns the highest score.
372
+ * Returns the number of differing bits between two Mini128 ELIDs.
373
+ * This distance is proportional to the angular distance between the
374
+ * original embeddings (lower = more similar).
375
+ *
376
+ * # Requirements
377
+ *
378
+ * Both ELIDs must use the Mini128 profile.
379
+ *
380
+ * # Parameters
381
+ *
382
+ * - `elid1`: First ELID string
383
+ * - `elid2`: Second ELID string
384
+ *
385
+ * # Returns
386
+ *
387
+ * Hamming distance (0-128). 0 means identical, 128 means completely different.
181
388
  *
182
389
  * # JavaScript Example
183
390
  *
184
391
  * ```javascript
185
- * import { bestMatch } from 'elid';
392
+ * import { encodeElid, elidHammingDistance, ElidProfile } from 'elid';
186
393
  *
187
- * const score = bestMatch("hello", "hallo");
188
- * console.log(score); // ~0.8
394
+ * const elid1 = encodeElid(embedding1, ElidProfile.Mini128);
395
+ * const elid2 = encodeElid(embedding2, ElidProfile.Mini128);
396
+ *
397
+ * const distance = elidHammingDistance(elid1, elid2);
398
+ * if (distance < 20) {
399
+ * console.log("Very similar embeddings!");
400
+ * }
189
401
  * ```
190
402
  */
191
- export function bestMatch(a: string, b: string): number;
403
+ export function elidHammingDistance(elid1: string, elid2: string): number;
404
+ /**
405
+ * Find the best match for a query string in an array of candidates.
406
+ *
407
+ * Returns an object with the index and similarity score of the best match.
408
+ *
409
+ * # JavaScript Example
410
+ *
411
+ * ```javascript
412
+ * import { findBestMatch } from 'elid';
413
+ *
414
+ * const candidates = ["apple", "application", "apply"];
415
+ * const result = findBestMatch("app", candidates);
416
+ * console.log(result); // { index: 0, score: 0.907 }
417
+ * ```
418
+ */
419
+ export function findBestMatch(query: string, candidates: string[]): object;
420
+ /**
421
+ * Compute the OSA (Optimal String Alignment) distance between two strings.
422
+ *
423
+ * Similar to Levenshtein but also considers transpositions as a single operation.
424
+ *
425
+ * # JavaScript Example
426
+ *
427
+ * ```javascript
428
+ * import { osaDistance } from 'elid';
429
+ *
430
+ * const distance = osaDistance("ca", "ac");
431
+ * console.log(distance); // 1 (transposition)
432
+ * ```
433
+ */
434
+ export function osaDistance(a: string, b: string): number;
192
435
  /**
193
436
  * Compute the Hamming distance between two SimHash values.
194
437
  *
@@ -208,37 +451,196 @@ export function bestMatch(a: string, b: string): number;
208
451
  */
209
452
  export function simhashDistance(hash1: number, hash2: number): number;
210
453
  /**
211
- * Compute the Jaro similarity between two strings.
454
+ * Encode an embedding with percentage-based compression.
455
+ *
456
+ * The retention percentage (0.0-1.0) controls how much information is preserved:
457
+ * - 1.0 = lossless (Full32 precision, all dimensions)
458
+ * - 0.5 = half precision and/or half dimensions
459
+ * - 0.25 = quarter precision and/or quarter dimensions
460
+ *
461
+ * The algorithm optimizes for dimension reduction first (which preserves
462
+ * more geometric relationships) before reducing precision.
463
+ *
464
+ * # Parameters
465
+ *
466
+ * - `embedding`: Float64 array of embedding values (64-2048 dimensions)
467
+ * - `retention_pct`: Information retention percentage (0.0-1.0)
468
+ *
469
+ * # Returns
470
+ *
471
+ * A base32hex-encoded ELID string.
472
+ *
473
+ * # JavaScript Example
474
+ *
475
+ * ```javascript
476
+ * import { encodeElidCompressed } from 'elid';
477
+ *
478
+ * const embedding = new Float64Array(768).fill(0.1);
479
+ *
480
+ * // 50% retention - good balance of size and fidelity
481
+ * const elid = encodeElidCompressed(embedding, 0.5);
482
+ *
483
+ * // 25% retention - smaller but less accurate
484
+ * const smallElid = encodeElidCompressed(embedding, 0.25);
485
+ * ```
486
+ */
487
+ export function encodeElidCompressed(embedding: Float64Array, retention_pct: number): string;
488
+ /**
489
+ * Encode an embedding with a maximum output string length constraint.
490
+ *
491
+ * Calculates the optimal precision and dimension settings to fit within
492
+ * the specified character limit while maximizing fidelity.
493
+ *
494
+ * # Parameters
495
+ *
496
+ * - `embedding`: Float64 array of embedding values (64-2048 dimensions)
497
+ * - `max_chars`: Maximum output string length in characters
498
+ *
499
+ * # Returns
500
+ *
501
+ * A base32hex-encoded ELID string guaranteed to be <= max_chars in length.
502
+ *
503
+ * # JavaScript Example
504
+ *
505
+ * ```javascript
506
+ * import { encodeElidMaxLength } from 'elid';
507
+ *
508
+ * const embedding = new Float64Array(768).fill(0.1);
509
+ *
510
+ * // Fit in 100 characters (e.g., for database column constraints)
511
+ * const elid = encodeElidMaxLength(embedding, 100);
512
+ * console.log(elid.length <= 100); // true
513
+ *
514
+ * // Fit in 50 characters (more compression)
515
+ * const shortElid = encodeElidMaxLength(embedding, 50);
516
+ * ```
517
+ */
518
+ export function encodeElidMaxLength(embedding: Float64Array, max_chars: number): string;
519
+ /**
520
+ * Compute the Jaro-Winkler similarity between two strings.
212
521
  *
213
522
  * Returns a value between 0.0 (completely different) and 1.0 (identical).
214
- * Particularly effective for short strings like names.
523
+ * Gives more favorable ratings to strings with common prefixes.
215
524
  *
216
525
  * # JavaScript Example
217
526
  *
218
527
  * ```javascript
219
- * import { jaro } from 'elid';
528
+ * import { jaroWinkler } from 'elid';
220
529
  *
221
- * const similarity = jaro("martha", "marhta");
222
- * console.log(similarity); // ~0.944
530
+ * const similarity = jaroWinkler("martha", "marhta");
531
+ * console.log(similarity); // ~0.961
223
532
  * ```
224
533
  */
225
- export function jaro(a: string, b: string): number;
534
+ export function jaroWinkler(a: string, b: string): number;
226
535
  /**
227
- * Find the best match for a query string in an array of candidates.
536
+ * Compute the Hamming distance between two strings.
228
537
  *
229
- * Returns an object with the index and similarity score of the best match.
538
+ * Returns the number of positions at which the characters differ.
539
+ * Returns null if strings have different lengths.
230
540
  *
231
541
  * # JavaScript Example
232
542
  *
233
543
  * ```javascript
234
- * import { findBestMatch } from 'elid';
544
+ * import { hamming } from 'elid';
235
545
  *
236
- * const candidates = ["apple", "application", "apply"];
237
- * const result = findBestMatch("app", candidates);
238
- * console.log(result); // { index: 0, score: 0.907 }
546
+ * const distance = hamming("karolin", "kathrin");
547
+ * console.log(distance); // 3
548
+ *
549
+ * const invalid = hamming("hello", "world!");
550
+ * console.log(invalid); // null
239
551
  * ```
240
552
  */
241
- export function findBestMatch(query: string, candidates: string[]): object;
553
+ export function hamming(a: string, b: string): number | undefined;
554
+ /**
555
+ * Dimension handling mode for full vector encoding.
556
+ *
557
+ * Controls whether to preserve original dimensions, reduce them,
558
+ * or project to a common space for cross-dimensional comparison.
559
+ *
560
+ * # JavaScript Example
561
+ *
562
+ * ```javascript
563
+ * import { ElidDimensionMode, encodeElidFullVector } from 'elid';
564
+ *
565
+ * // Preserve all dimensions
566
+ * // Reduce to fewer dimensions for smaller output
567
+ * // Common space for comparing different-sized embeddings
568
+ * ```
569
+ */
570
+ export enum ElidDimensionMode {
571
+ /**
572
+ * Preserve all original dimensions (no projection)
573
+ */
574
+ Preserve = 0,
575
+ /**
576
+ * Reduce dimensions using random projection
577
+ */
578
+ Reduce = 1,
579
+ /**
580
+ * Project to common space for cross-dimensional comparison
581
+ */
582
+ Common = 2,
583
+ }
584
+ /**
585
+ * ELID encoding profile for vector embeddings.
586
+ *
587
+ * Choose a profile based on your use case:
588
+ * - `Mini128`: Fast 128-bit SimHash, good for similarity via Hamming distance
589
+ * - `Morton10x10`: Z-order curve encoding, good for range queries
590
+ * - `Hilbert10x10`: Hilbert curve encoding, best locality preservation
591
+ *
592
+ * # JavaScript Example
593
+ *
594
+ * ```javascript
595
+ * import { ElidProfile, encodeElid } from 'elid';
596
+ *
597
+ * const embedding = new Float64Array(768).fill(0.1);
598
+ * const elid = encodeElid(embedding, ElidProfile.Mini128);
599
+ * ```
600
+ */
601
+ export enum ElidProfile {
602
+ /**
603
+ * 128-bit SimHash (cosine similarity via Hamming distance)
604
+ */
605
+ Mini128 = 0,
606
+ /**
607
+ * Morton/Z-order curve encoding (10 dims, 10 bits each)
608
+ */
609
+ Morton10x10 = 1,
610
+ /**
611
+ * Hilbert curve encoding (10 dims, 10 bits each)
612
+ */
613
+ Hilbert10x10 = 2,
614
+ }
615
+ /**
616
+ * Precision options for full vector encoding.
617
+ *
618
+ * Controls how many bits are used to represent each dimension value.
619
+ * Higher precision means more accurate reconstruction but larger output.
620
+ *
621
+ * # JavaScript Example
622
+ *
623
+ * ```javascript
624
+ * import { ElidVectorPrecision, encodeElidWithPrecision } from 'elid';
625
+ *
626
+ * const embedding = new Float64Array(768).fill(0.1);
627
+ * // Full32 = lossless, Half16 = smaller with minimal error
628
+ * ```
629
+ */
630
+ export enum ElidVectorPrecision {
631
+ /**
632
+ * Full 32-bit float (lossless, 4 bytes per dimension)
633
+ */
634
+ Full32 = 0,
635
+ /**
636
+ * 16-bit half-precision float (2 bytes per dimension)
637
+ */
638
+ Half16 = 1,
639
+ /**
640
+ * 8-bit quantized (1 byte per dimension, ~1% error)
641
+ */
642
+ Quant8 = 2,
643
+ }
242
644
  /**
243
645
  * Options for configuring string similarity algorithms
244
646
  */