elid 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/elid.d.ts +265 -265
  2. package/elid_bg.js +467 -467
  3. package/elid_bg.wasm +0 -0
  4. package/package.json +1 -1
package/elid.d.ts CHANGED
@@ -1,35 +1,28 @@
1
1
  /* tslint:disable */
2
2
  /* eslint-disable */
3
3
  /**
4
- * Compute the OSA (Optimal String Alignment) distance between two strings.
4
+ * Compute the SimHash fingerprint of a string.
5
5
  *
6
- * Similar to Levenshtein but also considers transpositions as a single operation.
6
+ * Returns a 64-bit hash where similar strings produce similar numbers.
7
+ * Use this for database queries by storing the hash and querying by numeric range.
7
8
  *
8
9
  * # JavaScript Example
9
10
  *
10
11
  * ```javascript
11
- * import { osaDistance } from 'elid';
12
- *
13
- * const distance = osaDistance("ca", "ac");
14
- * console.log(distance); // 1 (transposition)
15
- * ```
16
- */
17
- export function osaDistance(a: string, b: string): number;
18
- /**
19
- * Compute the Levenshtein distance between two strings.
20
- *
21
- * Returns the minimum number of single-character edits needed to transform one string into another.
12
+ * import { simhash } from 'elid';
22
13
  *
23
- * # JavaScript Example
14
+ * const hash1 = simhash("iPhone 14");
15
+ * const hash2 = simhash("iPhone 15");
16
+ * const hash3 = simhash("Galaxy S23");
24
17
  *
25
- * ```javascript
26
- * import { levenshtein } from 'elid';
18
+ * // hash1 and hash2 will be numerically close
19
+ * // hash3 will be numerically distant
27
20
  *
28
- * const distance = levenshtein("kitten", "sitting");
29
- * console.log(distance); // 3
21
+ * // Store in database as bigint:
22
+ * // { name: "iPhone 14", simhash: hash1 }
30
23
  * ```
31
24
  */
32
- export function levenshtein(a: string, b: string): number;
25
+ export function simhash(text: string): number;
33
26
  /**
34
27
  * Find all matches above a threshold score.
35
28
  *
@@ -47,130 +40,186 @@ export function levenshtein(a: string, b: string): number;
47
40
  */
48
41
  export function findMatchesAboveThreshold(query: string, candidates: string[], threshold: number): any;
49
42
  /**
50
- * Compute the Hamming distance between two SimHash values.
43
+ * Encode an embedding for cross-dimensional comparison.
51
44
  *
52
- * Returns the number of differing bits. Lower values = higher similarity.
45
+ * Projects the embedding to a common dimension space, allowing comparison
46
+ * between embeddings of different original dimensions (e.g., 256d vs 768d).
47
+ *
48
+ * # Parameters
49
+ *
50
+ * - `embedding`: Float64 array of embedding values (64-2048 dimensions)
51
+ * - `common_dims`: Target dimension space (all vectors projected here)
52
+ *
53
+ * # Returns
54
+ *
55
+ * A base32hex-encoded ELID string.
53
56
  *
54
57
  * # JavaScript Example
55
58
  *
56
59
  * ```javascript
57
- * import { simhash, simhashDistance } from 'elid';
60
+ * import { encodeElidCrossDimensional, decodeElidToEmbedding } from 'elid';
58
61
  *
59
- * const hash1 = simhash("iPhone 14");
60
- * const hash2 = simhash("iPhone 15");
61
- * const distance = simhashDistance(hash1, hash2);
62
+ * // Different sized embeddings from different models
63
+ * const embedding256 = new Float64Array(256).fill(0.1);
64
+ * const embedding768 = new Float64Array(768).fill(0.1);
62
65
  *
63
- * console.log(distance); // Low number = similar
66
+ * // Project both to 128-dim common space
67
+ * const elid1 = encodeElidCrossDimensional(embedding256, 128);
68
+ * const elid2 = encodeElidCrossDimensional(embedding768, 128);
69
+ *
70
+ * // Now they can be compared directly (both decode to 128 dims)
71
+ * const dec1 = decodeElidToEmbedding(elid1);
72
+ * const dec2 = decodeElidToEmbedding(elid2);
73
+ * // Both have length 128
64
74
  * ```
65
75
  */
66
- export function simhashDistance(hash1: number, hash2: number): number;
76
+ export function encodeElidCrossDimensional(embedding: Float64Array, common_dims: number): string;
67
77
  /**
68
- * Encode an embedding with a maximum output string length constraint.
78
+ * Find all hashes within a given distance threshold.
69
79
  *
70
- * Calculates the optimal precision and dimension settings to fit within
71
- * the specified character limit while maximizing fidelity.
80
+ * Useful for database queries - pre-compute hashes, then find similar ones.
81
+ *
82
+ * # JavaScript Example
83
+ *
84
+ * ```javascript
85
+ * import { simhash, findSimilarHashes } from 'elid';
86
+ *
87
+ * const candidates = ["iPhone 14 Pro", "iPhone 13", "Galaxy S23"];
88
+ * const hashes = candidates.map(s => simhash(s));
89
+ *
90
+ * const queryHash = simhash("iPhone 14");
91
+ * const matches = findSimilarHashes(queryHash, hashes, 10);
92
+ *
93
+ * console.log(matches); // [0, 1] - indices of similar items
94
+ * ```
95
+ */
96
+ export function findSimilarHashes(query_hash: number, candidate_hashes: Float64Array, max_distance: number): Uint32Array;
97
+ /**
98
+ * Compute the Jaro similarity between two strings.
99
+ *
100
+ * Returns a value between 0.0 (completely different) and 1.0 (identical).
101
+ * Particularly effective for short strings like names.
102
+ *
103
+ * # JavaScript Example
104
+ *
105
+ * ```javascript
106
+ * import { jaro } from 'elid';
107
+ *
108
+ * const similarity = jaro("martha", "marhta");
109
+ * console.log(similarity); // ~0.944
110
+ * ```
111
+ */
112
+ export function jaro(a: string, b: string): number;
113
+ /**
114
+ * Compute the best matching similarity between two strings.
115
+ *
116
+ * Runs multiple algorithms and returns the highest score.
117
+ *
118
+ * # JavaScript Example
119
+ *
120
+ * ```javascript
121
+ * import { bestMatch } from 'elid';
122
+ *
123
+ * const score = bestMatch("hello", "hallo");
124
+ * console.log(score); // ~0.8
125
+ * ```
126
+ */
127
+ export function bestMatch(a: string, b: string): number;
128
+ /**
129
+ * Encode an embedding vector to an ELID string.
130
+ *
131
+ * Converts a high-dimensional embedding (64-2048 dimensions) into a compact,
132
+ * sortable identifier. The ELID preserves locality properties for efficient
133
+ * similarity search.
72
134
  *
73
135
  * # Parameters
74
136
  *
75
137
  * - `embedding`: Float64 array of embedding values (64-2048 dimensions)
76
- * - `max_chars`: Maximum output string length in characters
138
+ * - `profile`: Encoding profile (Mini128, Morton10x10, or Hilbert10x10)
77
139
  *
78
140
  * # Returns
79
141
  *
80
- * A base32hex-encoded ELID string guaranteed to be <= max_chars in length.
142
+ * A base32hex-encoded ELID string suitable for storage and comparison.
81
143
  *
82
144
  * # JavaScript Example
83
145
  *
84
146
  * ```javascript
85
- * import { encodeElidMaxLength } from 'elid';
86
- *
87
- * const embedding = new Float64Array(768).fill(0.1);
88
- *
89
- * // Fit in 100 characters (e.g., for database column constraints)
90
- * const elid = encodeElidMaxLength(embedding, 100);
91
- * console.log(elid.length <= 100); // true
147
+ * import { encodeElid, ElidProfile } from 'elid';
92
148
  *
93
- * // Fit in 50 characters (more compression)
94
- * const shortElid = encodeElidMaxLength(embedding, 50);
149
+ * // OpenAI embeddings are 1536 dimensions
150
+ * const embedding = await getEmbedding("Hello world");
151
+ * const elid = encodeElid(embedding, ElidProfile.Mini128);
152
+ * console.log(elid); // "012345abcdef..."
95
153
  * ```
96
154
  */
97
- export function encodeElidMaxLength(embedding: Float64Array, max_chars: number): string;
155
+ export function encodeElid(embedding: Float64Array, profile: ElidProfile): string;
98
156
  /**
99
- * Compute Levenshtein distance with custom options.
157
+ * Compute the normalized SimHash similarity between two strings.
158
+ *
159
+ * Returns a value between 0.0 (completely different) and 1.0 (identical).
100
160
  *
101
161
  * # JavaScript Example
102
162
  *
103
163
  * ```javascript
104
- * import { levenshteinWithOpts, SimilarityOptions } from 'elid';
164
+ * import { simhashSimilarity } from 'elid';
105
165
  *
106
- * const opts = new SimilarityOptions();
107
- * opts.setCaseSensitive(false);
108
- * opts.setTrimWhitespace(true);
166
+ * const similarity = simhashSimilarity("iPhone 14", "iPhone 15");
167
+ * console.log(similarity); // ~0.9 (very similar)
109
168
  *
110
- * const distance = levenshteinWithOpts(" HELLO ", "hello", opts);
111
- * console.log(distance); // 0
169
+ * const similarity2 = simhashSimilarity("iPhone", "Galaxy");
170
+ * console.log(similarity2); // ~0.4 (different)
112
171
  * ```
113
172
  */
114
- export function levenshteinWithOpts(a: string, b: string, opts: SimilarityOptions): number;
173
+ export function simhashSimilarity(a: string, b: string): number;
115
174
  /**
116
- * Check if an ELID can be decoded back to an embedding.
175
+ * Encode an embedding using lossless full vector encoding.
117
176
  *
118
- * Returns true if the ELID was encoded with a FullVector profile
119
- * (lossless, compressed, or max_length), false otherwise.
177
+ * Preserves the exact embedding values (32-bit float precision) and all dimensions.
178
+ * This produces the largest output but allows exact reconstruction.
120
179
  *
121
180
  * # Parameters
122
181
  *
123
- * - `elid_str`: A valid ELID string (base32hex encoded)
182
+ * - `embedding`: Float64 array of embedding values (64-2048 dimensions)
124
183
  *
125
184
  * # Returns
126
185
  *
127
- * `true` if decodeElidToEmbedding will return an embedding, `false` otherwise.
186
+ * A base32hex-encoded ELID string that can be decoded back to the original embedding.
128
187
  *
129
188
  * # JavaScript Example
130
189
  *
131
190
  * ```javascript
132
- * import { encodeElid, encodeElidLossless, isElidReversible, ElidProfile } from 'elid';
191
+ * import { encodeElidLossless, decodeElidToEmbedding } from 'elid';
133
192
  *
134
193
  * const embedding = new Float64Array(768).fill(0.1);
194
+ * const elid = encodeElidLossless(embedding);
135
195
  *
136
- * // Mini128 is NOT reversible
137
- * const mini128Elid = encodeElid(embedding, ElidProfile.Mini128);
138
- * console.log(isElidReversible(mini128Elid)); // false
139
- *
140
- * // Lossless IS reversible
141
- * const losslessElid = encodeElidLossless(embedding);
142
- * console.log(isElidReversible(losslessElid)); // true
196
+ * // Later, recover the exact embedding
197
+ * const recovered = decodeElidToEmbedding(elid);
198
+ * // recovered is identical to embedding
143
199
  * ```
144
200
  */
145
- export function isElidReversible(elid_str: string): boolean;
201
+ export function encodeElidLossless(embedding: Float64Array): string;
146
202
  /**
147
- * Compute the SimHash fingerprint of a string.
203
+ * Compute the normalized Levenshtein similarity between two strings.
148
204
  *
149
- * Returns a 64-bit hash where similar strings produce similar numbers.
150
- * Use this for database queries by storing the hash and querying by numeric range.
205
+ * Returns a value between 0.0 (completely different) and 1.0 (identical).
151
206
  *
152
207
  * # JavaScript Example
153
208
  *
154
209
  * ```javascript
155
- * import { simhash } from 'elid';
156
- *
157
- * const hash1 = simhash("iPhone 14");
158
- * const hash2 = simhash("iPhone 15");
159
- * const hash3 = simhash("Galaxy S23");
160
- *
161
- * // hash1 and hash2 will be numerically close
162
- * // hash3 will be numerically distant
210
+ * import { normalizedLevenshtein } from 'elid';
163
211
  *
164
- * // Store in database as bigint:
165
- * // { name: "iPhone 14", simhash: hash1 }
212
+ * const similarity = normalizedLevenshtein("hello", "hallo");
213
+ * console.log(similarity); // ~0.8
166
214
  * ```
167
215
  */
168
- export function simhash(text: string): number;
216
+ export function normalizedLevenshtein(a: string, b: string): number;
169
217
  /**
170
- * Decode an ELID string to raw bytes.
218
+ * Decode an ELID string back to an embedding vector.
171
219
  *
172
- * Returns the raw byte representation of an ELID, including the header
173
- * and payload bytes. Useful for custom processing or debugging.
220
+ * Only works for ELIDs encoded with a FullVector profile (lossless,
221
+ * compressed, or max_length). Returns null for non-reversible profiles
222
+ * like Mini128, Morton, or Hilbert.
174
223
  *
175
224
  * # Parameters
176
225
  *
@@ -178,18 +227,27 @@ export function simhash(text: string): number;
178
227
  *
179
228
  * # Returns
180
229
  *
181
- * A Uint8Array containing the raw bytes (header + payload).
230
+ * A Float64Array containing the decoded embedding, or null if the ELID
231
+ * is not reversible.
232
+ *
233
+ * Note: If dimension reduction was used during encoding, the decoded
234
+ * embedding will be in the reduced dimension space, not the original.
182
235
  *
183
236
  * # JavaScript Example
184
237
  *
185
238
  * ```javascript
186
- * import { decodeElid } from 'elid';
239
+ * import { encodeElidLossless, decodeElidToEmbedding, isElidReversible } from 'elid';
187
240
  *
188
- * const bytes = decodeElid("012345abcdef...");
189
- * console.log(bytes); // Uint8Array [...]
241
+ * const embedding = new Float64Array(768).fill(0.1);
242
+ * const elid = encodeElidLossless(embedding);
243
+ *
244
+ * if (isElidReversible(elid)) {
245
+ * const recovered = decodeElidToEmbedding(elid);
246
+ * console.log(recovered.length); // 768
247
+ * }
190
248
  * ```
191
249
  */
192
- export function decodeElid(elid_str: string): Uint8Array;
250
+ export function decodeElidToEmbedding(elid_str: string): any;
193
251
  /**
194
252
  * Get metadata about a FullVector ELID.
195
253
  *
@@ -222,40 +280,92 @@ export function decodeElid(elid_str: string): Uint8Array;
222
280
  */
223
281
  export function getElidMetadata(elid_str: string): any;
224
282
  /**
225
- * Encode an embedding for cross-dimensional comparison.
283
+ * Decode an ELID string to raw bytes.
226
284
  *
227
- * Projects the embedding to a common dimension space, allowing comparison
228
- * between embeddings of different original dimensions (e.g., 256d vs 768d).
285
+ * Returns the raw byte representation of an ELID, including the header
286
+ * and payload bytes. Useful for custom processing or debugging.
287
+ *
288
+ * # Parameters
289
+ *
290
+ * - `elid_str`: A valid ELID string (base32hex encoded)
291
+ *
292
+ * # Returns
293
+ *
294
+ * A Uint8Array containing the raw bytes (header + payload).
295
+ *
296
+ * # JavaScript Example
297
+ *
298
+ * ```javascript
299
+ * import { decodeElid } from 'elid';
300
+ *
301
+ * const bytes = decodeElid("012345abcdef...");
302
+ * console.log(bytes); // Uint8Array [...]
303
+ * ```
304
+ */
305
+ export function decodeElid(elid_str: string): Uint8Array;
306
+ /**
307
+ * Compute the Levenshtein distance between two strings.
308
+ *
309
+ * Returns the minimum number of single-character edits needed to transform one string into another.
310
+ *
311
+ * # JavaScript Example
312
+ *
313
+ * ```javascript
314
+ * import { levenshtein } from 'elid';
315
+ *
316
+ * const distance = levenshtein("kitten", "sitting");
317
+ * console.log(distance); // 3
318
+ * ```
319
+ */
320
+ export function levenshtein(a: string, b: string): number;
321
+ /**
322
+ * Check if an ELID can be decoded back to an embedding.
323
+ *
324
+ * Returns true if the ELID was encoded with a FullVector profile
325
+ * (lossless, compressed, or max_length), false otherwise.
229
326
  *
230
327
  * # Parameters
231
328
  *
232
- * - `embedding`: Float64 array of embedding values (64-2048 dimensions)
233
- * - `common_dims`: Target dimension space (all vectors projected here)
329
+ * - `elid_str`: A valid ELID string (base32hex encoded)
234
330
  *
235
331
  * # Returns
236
332
  *
237
- * A base32hex-encoded ELID string.
333
+ * `true` if decodeElidToEmbedding will return an embedding, `false` otherwise.
238
334
  *
239
335
  * # JavaScript Example
240
336
  *
241
337
  * ```javascript
242
- * import { encodeElidCrossDimensional, decodeElidToEmbedding } from 'elid';
338
+ * import { encodeElid, encodeElidLossless, isElidReversible, ElidProfile } from 'elid';
243
339
  *
244
- * // Different sized embeddings from different models
245
- * const embedding256 = new Float64Array(256).fill(0.1);
246
- * const embedding768 = new Float64Array(768).fill(0.1);
340
+ * const embedding = new Float64Array(768).fill(0.1);
247
341
  *
248
- * // Project both to 128-dim common space
249
- * const elid1 = encodeElidCrossDimensional(embedding256, 128);
250
- * const elid2 = encodeElidCrossDimensional(embedding768, 128);
342
+ * // Mini128 is NOT reversible
343
+ * const mini128Elid = encodeElid(embedding, ElidProfile.Mini128);
344
+ * console.log(isElidReversible(mini128Elid)); // false
251
345
  *
252
- * // Now they can be compared directly (both decode to 128 dims)
253
- * const dec1 = decodeElidToEmbedding(elid1);
254
- * const dec2 = decodeElidToEmbedding(elid2);
255
- * // Both have length 128
346
+ * // Lossless IS reversible
347
+ * const losslessElid = encodeElidLossless(embedding);
348
+ * console.log(isElidReversible(losslessElid)); // true
256
349
  * ```
257
350
  */
258
- export function encodeElidCrossDimensional(embedding: Float64Array, common_dims: number): string;
351
+ export function isElidReversible(elid_str: string): boolean;
352
+ /**
353
+ * Compute Levenshtein distance with custom options.
354
+ *
355
+ * # JavaScript Example
356
+ *
357
+ * ```javascript
358
+ * import { levenshteinWithOpts, SimilarityOptions } from 'elid';
359
+ *
360
+ * const opts = new SimilarityOptions();
361
+ * opts.setCaseSensitive(false);
362
+ * opts.setTrimWhitespace(true);
363
+ *
364
+ * const distance = levenshteinWithOpts(" HELLO ", "hello", opts);
365
+ * console.log(distance); // 0
366
+ * ```
367
+ */
368
+ export function levenshteinWithOpts(a: string, b: string, opts: SimilarityOptions): number;
259
369
  /**
260
370
  * Compute the Hamming distance between two ELID strings.
261
371
  *
@@ -308,164 +418,120 @@ export function elidHammingDistance(elid1: string, elid2: string): number;
308
418
  */
309
419
  export function findBestMatch(query: string, candidates: string[]): object;
310
420
  /**
311
- * Encode an embedding with percentage-based compression.
312
- *
313
- * The retention percentage (0.0-1.0) controls how much information is preserved:
314
- * - 1.0 = lossless (Full32 precision, all dimensions)
315
- * - 0.5 = half precision and/or half dimensions
316
- * - 0.25 = quarter precision and/or quarter dimensions
317
- *
318
- * The algorithm optimizes for dimension reduction first (which preserves
319
- * more geometric relationships) before reducing precision.
320
- *
321
- * # Parameters
322
- *
323
- * - `embedding`: Float64 array of embedding values (64-2048 dimensions)
324
- * - `retention_pct`: Information retention percentage (0.0-1.0)
325
- *
326
- * # Returns
421
+ * Compute the OSA (Optimal String Alignment) distance between two strings.
327
422
  *
328
- * A base32hex-encoded ELID string.
423
+ * Similar to Levenshtein but also considers transpositions as a single operation.
329
424
  *
330
425
  * # JavaScript Example
331
426
  *
332
427
  * ```javascript
333
- * import { encodeElidCompressed } from 'elid';
334
- *
335
- * const embedding = new Float64Array(768).fill(0.1);
336
- *
337
- * // 50% retention - good balance of size and fidelity
338
- * const elid = encodeElidCompressed(embedding, 0.5);
428
+ * import { osaDistance } from 'elid';
339
429
  *
340
- * // 25% retention - smaller but less accurate
341
- * const smallElid = encodeElidCompressed(embedding, 0.25);
430
+ * const distance = osaDistance("ca", "ac");
431
+ * console.log(distance); // 1 (transposition)
342
432
  * ```
343
433
  */
344
- export function encodeElidCompressed(embedding: Float64Array, retention_pct: number): string;
434
+ export function osaDistance(a: string, b: string): number;
345
435
  /**
346
- * Compute the Jaro similarity between two strings.
436
+ * Compute the Hamming distance between two SimHash values.
347
437
  *
348
- * Returns a value between 0.0 (completely different) and 1.0 (identical).
349
- * Particularly effective for short strings like names.
438
+ * Returns the number of differing bits. Lower values = higher similarity.
350
439
  *
351
440
  * # JavaScript Example
352
441
  *
353
442
  * ```javascript
354
- * import { jaro } from 'elid';
443
+ * import { simhash, simhashDistance } from 'elid';
355
444
  *
356
- * const similarity = jaro("martha", "marhta");
357
- * console.log(similarity); // ~0.944
445
+ * const hash1 = simhash("iPhone 14");
446
+ * const hash2 = simhash("iPhone 15");
447
+ * const distance = simhashDistance(hash1, hash2);
448
+ *
449
+ * console.log(distance); // Low number = similar
358
450
  * ```
359
451
  */
360
- export function jaro(a: string, b: string): number;
452
+ export function simhashDistance(hash1: number, hash2: number): number;
361
453
  /**
362
- * Encode an embedding using lossless full vector encoding.
454
+ * Encode an embedding with percentage-based compression.
363
455
  *
364
- * Preserves the exact embedding values (32-bit float precision) and all dimensions.
365
- * This produces the largest output but allows exact reconstruction.
456
+ * The retention percentage (0.0-1.0) controls how much information is preserved:
457
+ * - 1.0 = lossless (Full32 precision, all dimensions)
458
+ * - 0.5 = half precision and/or half dimensions
459
+ * - 0.25 = quarter precision and/or quarter dimensions
460
+ *
461
+ * The algorithm optimizes for dimension reduction first (which preserves
462
+ * more geometric relationships) before reducing precision.
366
463
  *
367
464
  * # Parameters
368
465
  *
369
466
  * - `embedding`: Float64 array of embedding values (64-2048 dimensions)
467
+ * - `retention_pct`: Information retention percentage (0.0-1.0)
370
468
  *
371
469
  * # Returns
372
470
  *
373
- * A base32hex-encoded ELID string that can be decoded back to the original embedding.
471
+ * A base32hex-encoded ELID string.
374
472
  *
375
473
  * # JavaScript Example
376
474
  *
377
475
  * ```javascript
378
- * import { encodeElidLossless, decodeElidToEmbedding } from 'elid';
476
+ * import { encodeElidCompressed } from 'elid';
379
477
  *
380
478
  * const embedding = new Float64Array(768).fill(0.1);
381
- * const elid = encodeElidLossless(embedding);
382
479
  *
383
- * // Later, recover the exact embedding
384
- * const recovered = decodeElidToEmbedding(elid);
385
- * // recovered is identical to embedding
480
+ * // 50% retention - good balance of size and fidelity
481
+ * const elid = encodeElidCompressed(embedding, 0.5);
482
+ *
483
+ * // 25% retention - smaller but less accurate
484
+ * const smallElid = encodeElidCompressed(embedding, 0.25);
386
485
  * ```
387
486
  */
388
- export function encodeElidLossless(embedding: Float64Array): string;
487
+ export function encodeElidCompressed(embedding: Float64Array, retention_pct: number): string;
389
488
  /**
390
- * Decode an ELID string back to an embedding vector.
489
+ * Encode an embedding with a maximum output string length constraint.
391
490
  *
392
- * Only works for ELIDs encoded with a FullVector profile (lossless,
393
- * compressed, or max_length). Returns null for non-reversible profiles
394
- * like Mini128, Morton, or Hilbert.
491
+ * Calculates the optimal precision and dimension settings to fit within
492
+ * the specified character limit while maximizing fidelity.
395
493
  *
396
494
  * # Parameters
397
495
  *
398
- * - `elid_str`: A valid ELID string (base32hex encoded)
496
+ * - `embedding`: Float64 array of embedding values (64-2048 dimensions)
497
+ * - `max_chars`: Maximum output string length in characters
399
498
  *
400
499
  * # Returns
401
500
  *
402
- * A Float64Array containing the decoded embedding, or null if the ELID
403
- * is not reversible.
404
- *
405
- * Note: If dimension reduction was used during encoding, the decoded
406
- * embedding will be in the reduced dimension space, not the original.
501
+ * A base32hex-encoded ELID string guaranteed to be <= max_chars in length.
407
502
  *
408
503
  * # JavaScript Example
409
504
  *
410
505
  * ```javascript
411
- * import { encodeElidLossless, decodeElidToEmbedding, isElidReversible } from 'elid';
506
+ * import { encodeElidMaxLength } from 'elid';
412
507
  *
413
508
  * const embedding = new Float64Array(768).fill(0.1);
414
- * const elid = encodeElidLossless(embedding);
415
- *
416
- * if (isElidReversible(elid)) {
417
- * const recovered = decodeElidToEmbedding(elid);
418
- * console.log(recovered.length); // 768
419
- * }
420
- * ```
421
- */
422
- export function decodeElidToEmbedding(elid_str: string): any;
423
- /**
424
- * Encode an embedding vector to an ELID string.
425
- *
426
- * Converts a high-dimensional embedding (64-2048 dimensions) into a compact,
427
- * sortable identifier. The ELID preserves locality properties for efficient
428
- * similarity search.
429
- *
430
- * # Parameters
431
- *
432
- * - `embedding`: Float64 array of embedding values (64-2048 dimensions)
433
- * - `profile`: Encoding profile (Mini128, Morton10x10, or Hilbert10x10)
434
- *
435
- * # Returns
436
- *
437
- * A base32hex-encoded ELID string suitable for storage and comparison.
438
- *
439
- * # JavaScript Example
440
509
  *
441
- * ```javascript
442
- * import { encodeElid, ElidProfile } from 'elid';
510
+ * // Fit in 100 characters (e.g., for database column constraints)
511
+ * const elid = encodeElidMaxLength(embedding, 100);
512
+ * console.log(elid.length <= 100); // true
443
513
  *
444
- * // OpenAI embeddings are 1536 dimensions
445
- * const embedding = await getEmbedding("Hello world");
446
- * const elid = encodeElid(embedding, ElidProfile.Mini128);
447
- * console.log(elid); // "012345abcdef..."
514
+ * // Fit in 50 characters (more compression)
515
+ * const shortElid = encodeElidMaxLength(embedding, 50);
448
516
  * ```
449
517
  */
450
- export function encodeElid(embedding: Float64Array, profile: ElidProfile): string;
518
+ export function encodeElidMaxLength(embedding: Float64Array, max_chars: number): string;
451
519
  /**
452
- * Compute the normalized SimHash similarity between two strings.
520
+ * Compute the Jaro-Winkler similarity between two strings.
453
521
  *
454
522
  * Returns a value between 0.0 (completely different) and 1.0 (identical).
523
+ * Gives more favorable ratings to strings with common prefixes.
455
524
  *
456
525
  * # JavaScript Example
457
526
  *
458
527
  * ```javascript
459
- * import { simhashSimilarity } from 'elid';
460
- *
461
- * const similarity = simhashSimilarity("iPhone 14", "iPhone 15");
462
- * console.log(similarity); // ~0.9 (very similar)
528
+ * import { jaroWinkler } from 'elid';
463
529
  *
464
- * const similarity2 = simhashSimilarity("iPhone", "Galaxy");
465
- * console.log(similarity2); // ~0.4 (different)
530
+ * const similarity = jaroWinkler("martha", "marhta");
531
+ * console.log(similarity); // ~0.961
466
532
  * ```
467
533
  */
468
- export function simhashSimilarity(a: string, b: string): number;
534
+ export function jaroWinkler(a: string, b: string): number;
469
535
  /**
470
536
  * Compute the Hamming distance between two strings.
471
537
  *
@@ -485,72 +551,6 @@ export function simhashSimilarity(a: string, b: string): number;
485
551
  * ```
486
552
  */
487
553
  export function hamming(a: string, b: string): number | undefined;
488
- /**
489
- * Compute the best matching similarity between two strings.
490
- *
491
- * Runs multiple algorithms and returns the highest score.
492
- *
493
- * # JavaScript Example
494
- *
495
- * ```javascript
496
- * import { bestMatch } from 'elid';
497
- *
498
- * const score = bestMatch("hello", "hallo");
499
- * console.log(score); // ~0.8
500
- * ```
501
- */
502
- export function bestMatch(a: string, b: string): number;
503
- /**
504
- * Compute the Jaro-Winkler similarity between two strings.
505
- *
506
- * Returns a value between 0.0 (completely different) and 1.0 (identical).
507
- * Gives more favorable ratings to strings with common prefixes.
508
- *
509
- * # JavaScript Example
510
- *
511
- * ```javascript
512
- * import { jaroWinkler } from 'elid';
513
- *
514
- * const similarity = jaroWinkler("martha", "marhta");
515
- * console.log(similarity); // ~0.961
516
- * ```
517
- */
518
- export function jaroWinkler(a: string, b: string): number;
519
- /**
520
- * Compute the normalized Levenshtein similarity between two strings.
521
- *
522
- * Returns a value between 0.0 (completely different) and 1.0 (identical).
523
- *
524
- * # JavaScript Example
525
- *
526
- * ```javascript
527
- * import { normalizedLevenshtein } from 'elid';
528
- *
529
- * const similarity = normalizedLevenshtein("hello", "hallo");
530
- * console.log(similarity); // ~0.8
531
- * ```
532
- */
533
- export function normalizedLevenshtein(a: string, b: string): number;
534
- /**
535
- * Find all hashes within a given distance threshold.
536
- *
537
- * Useful for database queries - pre-compute hashes, then find similar ones.
538
- *
539
- * # JavaScript Example
540
- *
541
- * ```javascript
542
- * import { simhash, findSimilarHashes } from 'elid';
543
- *
544
- * const candidates = ["iPhone 14 Pro", "iPhone 13", "Galaxy S23"];
545
- * const hashes = candidates.map(s => simhash(s));
546
- *
547
- * const queryHash = simhash("iPhone 14");
548
- * const matches = findSimilarHashes(queryHash, hashes, 10);
549
- *
550
- * console.log(matches); // [0, 1] - indices of similar items
551
- * ```
552
- */
553
- export function findSimilarHashes(query_hash: number, candidate_hashes: Float64Array, max_distance: number): Uint32Array;
554
554
  /**
555
555
  * Dimension handling mode for full vector encoding.
556
556
  *