@ai4data/search 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +88 -0
- package/dist/index.d.mts +693 -0
- package/dist/index.mjs +2 -0
- package/dist/index.mjs.map +1 -0
- package/dist/rank-worker.mjs +2910 -0
- package/dist/rank-worker.mjs.map +1 -0
- package/dist/worker.mjs +1866 -0
- package/dist/worker.mjs.map +1 -0
- package/package.json +59 -0
package/dist/index.d.mts
ADDED
|
@@ -0,0 +1,693 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Types for the collection manifest format produced by the Python pipeline.
|
|
3
|
+
* The manifest.json file is the browser's entry point to a search collection.
|
|
4
|
+
*/
|
|
5
|
+
interface FlatIndexConfig {
|
|
6
|
+
/** Relative path to the flat brute-force index, e.g. "flat/embeddings.int8.json" */
|
|
7
|
+
path: string;
|
|
8
|
+
}
|
|
9
|
+
interface HNSWIndexConfig {
|
|
10
|
+
/** Directory prefix for all HNSW index files, e.g. "index/" */
|
|
11
|
+
path: string;
|
|
12
|
+
/** Relative path to index/config.json */
|
|
13
|
+
config: string;
|
|
14
|
+
/** Relative path to index/upper_layers.json */
|
|
15
|
+
upper_layers?: string;
|
|
16
|
+
/** Relative path to index/node_to_shard.json */
|
|
17
|
+
node_to_shard?: string;
|
|
18
|
+
/** Relative path to index/titles.json (display metadata, no vectors) */
|
|
19
|
+
titles?: string;
|
|
20
|
+
/** Relative path to index/cluster_centroids.json */
|
|
21
|
+
cluster_centroids?: string;
|
|
22
|
+
/** Relative path to index/bm25_corpus.json (lightweight text-only corpus for BM25) */
|
|
23
|
+
bm25_corpus?: string;
|
|
24
|
+
}
|
|
25
|
+
type SearchMode$1 = 'flat' | 'hnsw';
|
|
26
|
+
interface ManifestThresholds {
|
|
27
|
+
/** Maximum n_items for flat (brute-force) mode; above this HNSW is used */
|
|
28
|
+
flat_max: number;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Top-level manifest written by 03_build_index.py.
|
|
32
|
+
* The browser worker fetches this URL first to determine the search mode.
|
|
33
|
+
*/
|
|
34
|
+
interface CollectionManifest {
|
|
35
|
+
version?: string;
|
|
36
|
+
collection_id: string;
|
|
37
|
+
n_items: number;
|
|
38
|
+
embedding_dim: number;
|
|
39
|
+
matryoshka_dim?: number | null;
|
|
40
|
+
quant?: string;
|
|
41
|
+
model_id: string;
|
|
42
|
+
search_mode: SearchMode$1;
|
|
43
|
+
/** Whether index files are gzip-compressed (.json.gz). Static hosts like GitHub Pages require false. */
|
|
44
|
+
compressed: boolean;
|
|
45
|
+
flat?: FlatIndexConfig;
|
|
46
|
+
index?: HNSWIndexConfig;
|
|
47
|
+
thresholds?: ManifestThresholds;
|
|
48
|
+
/** Fields included in result metadata (e.g. ["idno","title","abstract","type","doi"]) */
|
|
49
|
+
preview_fields?: string[];
|
|
50
|
+
/** Fields used for BM25 lexical search */
|
|
51
|
+
bm25_fields?: string[];
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Parsed HNSW config.json contents, loaded by HNSWEngine at init.
|
|
55
|
+
*/
|
|
56
|
+
interface HNSWConfig {
|
|
57
|
+
n_items: number;
|
|
58
|
+
dim: number;
|
|
59
|
+
matryoshka_dim: number | null;
|
|
60
|
+
quant: string;
|
|
61
|
+
hnsw_M: number;
|
|
62
|
+
hnsw_ef_construction: number;
|
|
63
|
+
n_layers: number;
|
|
64
|
+
n_clusters: number;
|
|
65
|
+
entry_node_id: number;
|
|
66
|
+
entry_layer: number;
|
|
67
|
+
recall_at_10: number;
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* A single entry in index/bm25_corpus.json.
|
|
71
|
+
* Lightweight text corpus written by the pipeline for BM25 indexing.
|
|
72
|
+
*/
|
|
73
|
+
interface BM25CorpusEntry {
|
|
74
|
+
id: string | number;
|
|
75
|
+
title: string;
|
|
76
|
+
text: string;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Core search types: result shapes, options, and the shared engine interface.
|
|
81
|
+
*/
|
|
82
|
+
interface SearchResult {
|
|
83
|
+
/** Integer node ID (HNSW insertion order) or string document ID (flat mode) */
|
|
84
|
+
id: number | string;
|
|
85
|
+
/** Document identifier from the source data (e.g. "WPS9999") */
|
|
86
|
+
idno?: string;
|
|
87
|
+
/** Cosine similarity (semantic) or combined hybrid score in [0, 1] */
|
|
88
|
+
score: number;
|
|
89
|
+
title: string;
|
|
90
|
+
/** Abstract / body text (present in flat mode; absent in HNSW-only results) */
|
|
91
|
+
text?: string;
|
|
92
|
+
abstract?: string;
|
|
93
|
+
type?: string;
|
|
94
|
+
type_extra?: string;
|
|
95
|
+
sub_title?: string;
|
|
96
|
+
doi?: string;
|
|
97
|
+
url?: string;
|
|
98
|
+
geographic_coverage?: GeographicCoverage[];
|
|
99
|
+
time_coverage?: string;
|
|
100
|
+
source?: string[];
|
|
101
|
+
/** Normalized semantic contribution (0–1) in hybrid mode */
|
|
102
|
+
semanticScore?: number;
|
|
103
|
+
/** Normalized BM25 contribution (0–1) in hybrid mode */
|
|
104
|
+
lexicalScore?: number;
|
|
105
|
+
/** Score from cross-encoder reranker (higher = more relevant) */
|
|
106
|
+
rerank_score?: number;
|
|
107
|
+
/** Allow arbitrary additional preview fields from the pipeline */
|
|
108
|
+
[key: string]: unknown;
|
|
109
|
+
}
|
|
110
|
+
type GeographicCoverage = string | {
|
|
111
|
+
title?: string;
|
|
112
|
+
name?: string;
|
|
113
|
+
type?: string;
|
|
114
|
+
[key: string]: unknown;
|
|
115
|
+
};
|
|
116
|
+
interface SearchStats {
|
|
117
|
+
/** Wall-clock milliseconds for the entire search() call */
|
|
118
|
+
latencyMs: number;
|
|
119
|
+
/** New shard files fetched during this query (0 = fully cached) */
|
|
120
|
+
shardsLoaded: number;
|
|
121
|
+
/** Total shards currently held in the worker's in-memory Map */
|
|
122
|
+
totalCachedShards: number;
|
|
123
|
+
}
|
|
124
|
+
interface SearchOptions {
|
|
125
|
+
topK?: number;
|
|
126
|
+
/** HNSW beam width at layer 0 (higher = better recall, more shard fetches) */
|
|
127
|
+
ef?: number;
|
|
128
|
+
/** HNSW beam width for upper-layer descent */
|
|
129
|
+
ef_upper?: number;
|
|
130
|
+
/** Minimum cosine similarity threshold (flat mode) */
|
|
131
|
+
threshold?: number;
|
|
132
|
+
}
|
|
133
|
+
/**
|
|
134
|
+
* Common interface implemented by both FlatEngine and HNSWEngine.
|
|
135
|
+
* Allows search.worker.ts to operate on either engine without type narrowing.
|
|
136
|
+
*/
|
|
137
|
+
interface SearchEngine {
|
|
138
|
+
readonly ready: boolean;
|
|
139
|
+
search(queryVec: Float32Array, opts?: SearchOptions): Promise<SearchResult[]> | SearchResult[];
|
|
140
|
+
lastStats: SearchStats | null;
|
|
141
|
+
}
|
|
142
|
+
/**
|
|
143
|
+
* A single item in the flat index (flat/embeddings.int8.json).
|
|
144
|
+
* Contains the int8 quantized vector plus all preview fields.
|
|
145
|
+
*/
|
|
146
|
+
interface FlatItem {
|
|
147
|
+
id: string | number;
|
|
148
|
+
idno?: string;
|
|
149
|
+
title: string;
|
|
150
|
+
text: string;
|
|
151
|
+
scale: number;
|
|
152
|
+
/** Stored as plain number[] in the JSON; converted to Int8Array on load */
|
|
153
|
+
qv: number[] | Int8Array;
|
|
154
|
+
type?: string;
|
|
155
|
+
[key: string]: unknown;
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* A node in a layer-0 shard file.
|
|
159
|
+
*/
|
|
160
|
+
interface ShardNode {
|
|
161
|
+
id: number;
|
|
162
|
+
scale: number;
|
|
163
|
+
qv: number[];
|
|
164
|
+
neighbors: number[];
|
|
165
|
+
}
|
|
166
|
+
/**
|
|
167
|
+
* Contents of index/layer0/shard_NNN.json.
|
|
168
|
+
*/
|
|
169
|
+
interface Shard {
|
|
170
|
+
shard_id: number;
|
|
171
|
+
nodes: ShardNode[];
|
|
172
|
+
}
|
|
173
|
+
/**
|
|
174
|
+
* A minimal interface for wink-bm25-text-search, enough to type HybridSearch
|
|
175
|
+
* without a full declaration file for the library.
|
|
176
|
+
*/
|
|
177
|
+
interface BM25Engine {
|
|
178
|
+
/** Returns [[docIdx, score], ...] sorted by score descending */
|
|
179
|
+
search(query: string, topK: number): [number, number][];
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* Discriminated union types for the search worker message protocol.
|
|
184
|
+
*
|
|
185
|
+
* Using discriminated unions lets TypeScript narrow the message type in each
|
|
186
|
+
* `case` branch of the message handler, eliminating unsafe `as` casts and
|
|
187
|
+
* ensuring inbound/outbound messages stay in sync.
|
|
188
|
+
*
|
|
189
|
+
* Usage in main thread:
|
|
190
|
+
* worker.postMessage({ type: 'init', manifestUrl: '...' } satisfies WorkerInboundMessage)
|
|
191
|
+
*
|
|
192
|
+
* Usage in worker:
|
|
193
|
+
* self.postMessage({ type: 'ready', mode: 'hnsw', config: manifest } satisfies WorkerOutboundMessage)
|
|
194
|
+
*/
|
|
195
|
+
|
|
196
|
+
type WorkerInboundMessage = WorkerInitMessage | WorkerSearchMessage | WorkerEmbedMessage | WorkerPingMessage | WorkerGetRecentMessage | WorkerSearchCompareMessage;
|
|
197
|
+
interface WorkerInitMessage {
|
|
198
|
+
type: 'init';
|
|
199
|
+
/** Must be an absolute URL — resolve with new URL(url, location.href).href before posting */
|
|
200
|
+
manifestUrl: string;
|
|
201
|
+
/** HuggingFace model ID, defaults to avsolatorio/GIST-small-Embedding-v0 */
|
|
202
|
+
modelId?: string;
|
|
203
|
+
/** If true, skip loading the embedding model (for testing BM25 fallback). Index + BM25 still load. */
|
|
204
|
+
skipModelLoad?: boolean;
|
|
205
|
+
/** Delay (seconds) before starting to load the embedding model; index + BM25 load first (for testing). */
|
|
206
|
+
modelLoadDelaySeconds?: number;
|
|
207
|
+
}
|
|
208
|
+
interface WorkerSearchMessage {
|
|
209
|
+
type: 'search';
|
|
210
|
+
text: string;
|
|
211
|
+
topK?: number;
|
|
212
|
+
ef?: number;
|
|
213
|
+
ef_upper?: number;
|
|
214
|
+
threshold?: number;
|
|
215
|
+
mode?: 'semantic' | 'lexical' | 'hybrid';
|
|
216
|
+
}
|
|
217
|
+
interface WorkerEmbedMessage {
|
|
218
|
+
type: 'embed';
|
|
219
|
+
text: string;
|
|
220
|
+
}
|
|
221
|
+
interface WorkerPingMessage {
|
|
222
|
+
type: 'ping';
|
|
223
|
+
}
|
|
224
|
+
interface WorkerGetRecentMessage {
|
|
225
|
+
type: 'getRecent';
|
|
226
|
+
limit?: number;
|
|
227
|
+
}
|
|
228
|
+
interface WorkerSearchCompareMessage {
|
|
229
|
+
type: 'searchCompare';
|
|
230
|
+
text: string;
|
|
231
|
+
topK?: number;
|
|
232
|
+
ef?: number;
|
|
233
|
+
ef_upper?: number;
|
|
234
|
+
}
|
|
235
|
+
type WorkerOutboundMessage = WorkerProgressMessage | WorkerIndexReadyMessage | WorkerReadyMessage | WorkerResultsMessage | WorkerEmbeddingMessage | WorkerPongMessage | WorkerLoadingMessage | WorkerRecentMessage | WorkerCompareMessage | WorkerErrorMessage;
|
|
236
|
+
interface WorkerProgressMessage {
|
|
237
|
+
type: 'progress';
|
|
238
|
+
phase: 'model' | 'index';
|
|
239
|
+
message: string;
|
|
240
|
+
}
|
|
241
|
+
/**
|
|
242
|
+
* Sent when the index files + BM25 corpus are loaded.
|
|
243
|
+
* Lexical search is available from this point on, even if the embedding model
|
|
244
|
+
* is still downloading (BM25 fallback).
|
|
245
|
+
*/
|
|
246
|
+
interface WorkerIndexReadyMessage {
|
|
247
|
+
type: 'index_ready';
|
|
248
|
+
bm25Ready: boolean;
|
|
249
|
+
}
|
|
250
|
+
/**
|
|
251
|
+
* Sent when the index is ready; if the embedding model was loaded, semantic/hybrid are available.
|
|
252
|
+
* When skipModelLoad was used, modelLoaded is false and only lexical (BM25) runs for semantic/hybrid.
|
|
253
|
+
*/
|
|
254
|
+
interface WorkerReadyMessage {
|
|
255
|
+
type: 'ready';
|
|
256
|
+
mode: 'flat' | 'hnsw';
|
|
257
|
+
config: CollectionManifest;
|
|
258
|
+
/** false when init was called with skipModelLoad (embedding model not loaded). */
|
|
259
|
+
modelLoaded?: boolean;
|
|
260
|
+
}
|
|
261
|
+
interface WorkerResultsMessage {
|
|
262
|
+
type: 'results';
|
|
263
|
+
data: SearchResult[];
|
|
264
|
+
stats?: SearchStats | null;
|
|
265
|
+
/**
|
|
266
|
+
* true when the result was produced via BM25 fallback because the embedding
|
|
267
|
+
* model was not yet ready (requested mode was semantic or hybrid).
|
|
268
|
+
*/
|
|
269
|
+
fallback?: boolean;
|
|
270
|
+
}
|
|
271
|
+
interface WorkerEmbeddingMessage {
|
|
272
|
+
type: 'embedding';
|
|
273
|
+
/** Transferred as ArrayBuffer for zero-copy */
|
|
274
|
+
data: Float32Array;
|
|
275
|
+
}
|
|
276
|
+
interface WorkerPongMessage {
|
|
277
|
+
type: 'pong';
|
|
278
|
+
}
|
|
279
|
+
interface WorkerLoadingMessage {
|
|
280
|
+
type: 'loading';
|
|
281
|
+
}
|
|
282
|
+
interface WorkerRecentMessage {
|
|
283
|
+
type: 'recent';
|
|
284
|
+
data: SearchResult[];
|
|
285
|
+
}
|
|
286
|
+
interface WorkerCompareMessage {
|
|
287
|
+
type: 'compare';
|
|
288
|
+
hnsw: SearchResult[];
|
|
289
|
+
flat: SearchResult[];
|
|
290
|
+
recall: number;
|
|
291
|
+
overlap: number;
|
|
292
|
+
k: number;
|
|
293
|
+
}
|
|
294
|
+
interface WorkerErrorMessage {
|
|
295
|
+
type: 'error';
|
|
296
|
+
message: string;
|
|
297
|
+
originalType?: string;
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
/**
|
|
301
|
+
* SearchClient — framework-agnostic wrapper around the search Web Worker.
|
|
302
|
+
*
|
|
303
|
+
* Works in any JavaScript environment that supports Web Workers.
|
|
304
|
+
* Extend or wrap with framework adapters (see adapters/vue.ts, adapters/react.ts).
|
|
305
|
+
*
|
|
306
|
+
* @example
|
|
307
|
+
* ```ts
|
|
308
|
+
* const client = new SearchClient('https://example.com/data/prwp/manifest.json')
|
|
309
|
+
*
|
|
310
|
+
* client.on('index_ready', () => {
|
|
311
|
+
* client.search('climate finance', { topK: 10, mode: 'hybrid' })
|
|
312
|
+
* })
|
|
313
|
+
*
|
|
314
|
+
* client.on('results', ({ data, stats }) => {
|
|
315
|
+
* console.log(data) // SearchResult[]
|
|
316
|
+
* console.log(stats) // SearchStats | null
|
|
317
|
+
* })
|
|
318
|
+
*
|
|
319
|
+
* // Clean up when done
|
|
320
|
+
* client.destroy()
|
|
321
|
+
* ```
|
|
322
|
+
*/
|
|
323
|
+
|
|
324
|
+
type SearchMode = 'semantic' | 'lexical' | 'hybrid';
|
|
325
|
+
interface SearchClientOptions {
|
|
326
|
+
/** HuggingFace model ID to use for embeddings (default: avsolatorio/GIST-small-Embedding-v0) */
|
|
327
|
+
modelId?: string;
|
|
328
|
+
/** If true, skip loading the embedding model (for testing BM25 fallback). */
|
|
329
|
+
skipModelLoad?: boolean;
|
|
330
|
+
/** Delay (seconds) before loading the embedding model; index + BM25 load first (for testing). */
|
|
331
|
+
modelLoadDelaySeconds?: number;
|
|
332
|
+
/**
|
|
333
|
+
* Factory function that creates the Web Worker.
|
|
334
|
+
* Defaults to the bundled search worker created via `new URL()`.
|
|
335
|
+
* Override when you need a custom worker path (e.g. CDN, service worker proxy).
|
|
336
|
+
*
|
|
337
|
+
* @example
|
|
338
|
+
* ```ts
|
|
339
|
+
* // Vite / webpack 5 (recommended — bundler resolves the path)
|
|
340
|
+
* new SearchClient(url, {
|
|
341
|
+
* workerFactory: () => new Worker(new URL('@ai4data/search/worker', import.meta.url), { type: 'module' })
|
|
342
|
+
* })
|
|
343
|
+
* ```
|
|
344
|
+
*/
|
|
345
|
+
workerFactory?: () => Worker;
|
|
346
|
+
}
|
|
347
|
+
type MessageHandler<T extends WorkerOutboundMessage['type']> = (msg: Extract<WorkerOutboundMessage, {
|
|
348
|
+
type: T;
|
|
349
|
+
}>) => void;
|
|
350
|
+
declare class SearchClient {
|
|
351
|
+
/** True once the index + BM25 corpus are loaded. Lexical search available. */
|
|
352
|
+
isIndexReady: boolean;
|
|
353
|
+
/** True once the ONNX embedding model is ready. Semantic + hybrid search available. */
|
|
354
|
+
isModelReady: boolean;
|
|
355
|
+
/** Latest progress/status message from the worker. */
|
|
356
|
+
loadingMessage: string;
|
|
357
|
+
/** True when the last search fell back to BM25 because the model wasn't ready. */
|
|
358
|
+
activeFallback: boolean;
|
|
359
|
+
/** Parsed collection manifest, available after `index_ready`. */
|
|
360
|
+
manifest: CollectionManifest | null;
|
|
361
|
+
private readonly worker;
|
|
362
|
+
private readonly handlers;
|
|
363
|
+
private destroyed;
|
|
364
|
+
/**
|
|
365
|
+
* @param manifestUrl - Absolute or relative URL to `manifest.json`.
|
|
366
|
+
* Relative URLs are resolved against `location.href`.
|
|
367
|
+
* @param opts - Optional configuration.
|
|
368
|
+
*/
|
|
369
|
+
constructor(manifestUrl: string, opts?: SearchClientOptions);
|
|
370
|
+
/**
|
|
371
|
+
* Subscribe to a specific worker message type.
|
|
372
|
+
* Returns an unsubscribe function — call it to remove the handler.
|
|
373
|
+
*
|
|
374
|
+
* @example
|
|
375
|
+
* ```ts
|
|
376
|
+
* const off = client.on('results', ({ data }) => setResults(data))
|
|
377
|
+
* // later…
|
|
378
|
+
* off()
|
|
379
|
+
* ```
|
|
380
|
+
*/
|
|
381
|
+
on<T extends WorkerOutboundMessage['type']>(type: T, handler: MessageHandler<T>): () => void;
|
|
382
|
+
/**
|
|
383
|
+
* Submit a search query. No-op if the index is not yet ready.
|
|
384
|
+
*
|
|
385
|
+
* @param text - Natural-language query
|
|
386
|
+
* @param opts - Optional topK, ef, mode ('semantic' | 'lexical' | 'hybrid')
|
|
387
|
+
*/
|
|
388
|
+
search(text: string, opts?: SearchOptions & {
|
|
389
|
+
mode?: SearchMode;
|
|
390
|
+
}): void;
|
|
391
|
+
/**
|
|
392
|
+
* Fetch the most-recent items from the index (useful for pre-search state).
|
|
393
|
+
*/
|
|
394
|
+
getRecent(limit?: number): void;
|
|
395
|
+
/**
|
|
396
|
+
* Ping the worker. Resolves when the worker responds with 'pong'.
|
|
397
|
+
*/
|
|
398
|
+
ping(): Promise<void>;
|
|
399
|
+
/**
|
|
400
|
+
* Terminate the worker and clean up all event listeners.
|
|
401
|
+
* The client is unusable after this call.
|
|
402
|
+
*/
|
|
403
|
+
destroy(): void;
|
|
404
|
+
private _handleMessage;
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
/**
|
|
408
|
+
* flat-engine.ts
|
|
409
|
+
*
|
|
410
|
+
* Brute-force (flat) search engine backed by an Int8-quantized index.
|
|
411
|
+
* Suitable for collections up to ~50 k documents where exact nearest-neighbour
|
|
412
|
+
* search is fast enough without an ANN index structure.
|
|
413
|
+
*/
|
|
414
|
+
|
|
415
|
+
/**
|
|
416
|
+
* Brute-force semantic search engine.
|
|
417
|
+
*
|
|
418
|
+
* Usage:
|
|
419
|
+
* ```ts
|
|
420
|
+
* const engine = new FlatEngine()
|
|
421
|
+
* await engine.load('/data/flat/embeddings.int8.json')
|
|
422
|
+
* const results = engine.search(queryVec, { topK: 10 })
|
|
423
|
+
* ```
|
|
424
|
+
*/
|
|
425
|
+
declare class FlatEngine implements SearchEngine {
|
|
426
|
+
/** Internal item list with Int8-converted vectors */
|
|
427
|
+
private items;
|
|
428
|
+
/** True once `load()` has completed successfully */
|
|
429
|
+
readonly ready: boolean;
|
|
430
|
+
/** Statistics from the most recent `search()` call, or `null` before first search */
|
|
431
|
+
lastStats: SearchStats | null;
|
|
432
|
+
constructor();
|
|
433
|
+
/**
|
|
434
|
+
* Fetch and parse the flat index file, converting all `qv` arrays to `Int8Array`.
|
|
435
|
+
*
|
|
436
|
+
* @param url - URL of the `embeddings.int8.json` index file
|
|
437
|
+
* @returns The raw item list from the JSON (before Int8 conversion)
|
|
438
|
+
*/
|
|
439
|
+
load(url: string): Promise<FlatItem[]>;
|
|
440
|
+
/**
|
|
441
|
+
* Run a brute-force cosine-similarity search over all loaded items.
|
|
442
|
+
*
|
|
443
|
+
* @param queryVec - L2-normalised query embedding (Float32Array)
|
|
444
|
+
* @param opts - Optional search parameters
|
|
445
|
+
* @returns Top-K results sorted by descending score
|
|
446
|
+
* @throws {Error} If called before `load()` has completed
|
|
447
|
+
*/
|
|
448
|
+
search(queryVec: Float32Array, opts?: SearchOptions): SearchResult[];
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
/**
|
|
452
|
+
* hnsw-engine.ts
|
|
453
|
+
*
|
|
454
|
+
* Approximate nearest-neighbour search using a pre-built HNSW index.
|
|
455
|
+
* Upper layers (layers ≥ 1) are held entirely in memory; layer-0 is
|
|
456
|
+
* loaded on demand from shard files via `ShardLoader`.
|
|
457
|
+
*/
|
|
458
|
+
|
|
459
|
+
/** Options accepted by `HNSWEngine.init()` */
|
|
460
|
+
interface HNSWInitOptions {
|
|
461
|
+
/** Cache Storage bucket name forwarded to `ShardLoader` and `fetchJson` */
|
|
462
|
+
cacheName?: string;
|
|
463
|
+
/** Parsed manifest; used to resolve index file paths and compressed flag */
|
|
464
|
+
manifest?: CollectionManifest | null;
|
|
465
|
+
}
|
|
466
|
+
/**
|
|
467
|
+
* HNSW approximate nearest-neighbour search engine.
|
|
468
|
+
*
|
|
469
|
+
* Typical usage:
|
|
470
|
+
* ```ts
|
|
471
|
+
* const engine = new HNSWEngine()
|
|
472
|
+
* await engine.init('/data/prwp/')
|
|
473
|
+
* const results = await engine.search(queryVec, { topK: 10, ef: 50 })
|
|
474
|
+
* ```
|
|
475
|
+
*/
|
|
476
|
+
declare class HNSWEngine implements SearchEngine {
|
|
477
|
+
/** Parsed `index/config.json` */
|
|
478
|
+
private config;
|
|
479
|
+
/** Parsed `index/upper_layers.json` */
|
|
480
|
+
private upperLayers;
|
|
481
|
+
/** Maps string node ID → shard ID */
|
|
482
|
+
private nodeToShard;
|
|
483
|
+
/** Shard loader for layer-0 data */
|
|
484
|
+
private loader;
|
|
485
|
+
/** In-memory node cache (Int8 vectors, neighbours) */
|
|
486
|
+
private nodeCache;
|
|
487
|
+
/** True once `init()` has completed successfully */
|
|
488
|
+
readonly ready: boolean;
|
|
489
|
+
/** Statistics from the most recent `search()` call, or `null` before first search */
|
|
490
|
+
lastStats: SearchStats | null;
|
|
491
|
+
constructor();
|
|
492
|
+
/**
|
|
493
|
+
* Load all index metadata and populate the upper-layer node cache.
|
|
494
|
+
* This must be called (and awaited) before any call to `search()`.
|
|
495
|
+
*
|
|
496
|
+
* @param baseUrl - Base URL of the collection directory (e.g. `/data/prwp/`)
|
|
497
|
+
* @param opts - Optional cache name and manifest
|
|
498
|
+
*/
|
|
499
|
+
init(baseUrl: string, opts?: HNSWInitOptions): Promise<void>;
|
|
500
|
+
/**
|
|
501
|
+
* Search the HNSW index for the nearest neighbours of `queryVec`.
|
|
502
|
+
*
|
|
503
|
+
* @param queryVec - L2-normalised query embedding (Float32Array)
|
|
504
|
+
* @param opts - Optional search parameters (`topK`, `ef`, `ef_upper`)
|
|
505
|
+
* @returns Top-K results sorted by descending score
|
|
506
|
+
* @throws {Error} If called before `init()` has completed
|
|
507
|
+
*/
|
|
508
|
+
search(queryVec: Float32Array, opts?: SearchOptions): Promise<SearchResult[]>;
|
|
509
|
+
/**
|
|
510
|
+
* Single-layer greedy beam descent for layers ≥ 1 (upper layers).
|
|
511
|
+
* All nodes at these layers are already in `nodeCache`.
|
|
512
|
+
*
|
|
513
|
+
* @param queryVec - L2-normalised query vector
|
|
514
|
+
* @param entryPoints - Current best candidates as `[score, nodeId]` tuples
|
|
515
|
+
* @param layer - Layer index to traverse
|
|
516
|
+
* @param ef_upper - Beam width (number of candidates to keep)
|
|
517
|
+
* @returns Updated candidate list for the next layer
|
|
518
|
+
*/
|
|
519
|
+
private _beamDescentLayer;
|
|
520
|
+
/**
|
|
521
|
+
* Score a node that is present in `nodeCache` (upper-layer or already loaded layer-0).
|
|
522
|
+
*
|
|
523
|
+
* @param queryVec - L2-normalised query vector
|
|
524
|
+
* @param nodeId - Node to score
|
|
525
|
+
* @returns Approximate dot-product similarity, or `-Infinity` if node is absent
|
|
526
|
+
*/
|
|
527
|
+
private _scoreUpperNode;
|
|
528
|
+
/**
|
|
529
|
+
* Layer-0 beam search. Loads shard files on demand as the search frontier expands.
|
|
530
|
+
*
|
|
531
|
+
* @param queryVec - L2-normalised query vector
|
|
532
|
+
* @param entryPoints - Entry candidates from upper-layer descent
|
|
533
|
+
* @param ef - Beam width (number of candidates to maintain in `W`)
|
|
534
|
+
* @returns All candidates in `W` sorted by descending score as `SearchResult` objects
|
|
535
|
+
*/
|
|
536
|
+
private _beamSearchLayer0;
|
|
537
|
+
/**
|
|
538
|
+
* Retrieve a layer-0 node from cache, loading its shard file if necessary.
|
|
539
|
+
* Once loaded, the node entry in `nodeCache` is augmented with `neighbors`
|
|
540
|
+
* and `_l0loaded = true`.
|
|
541
|
+
*
|
|
542
|
+
* @param nodeId - Node to retrieve
|
|
543
|
+
* @returns Fully populated cache entry, or `null` if the node cannot be found
|
|
544
|
+
*/
|
|
545
|
+
private _getLayer0Node;
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
/**
|
|
549
|
+
* hybrid-search.ts
|
|
550
|
+
*
|
|
551
|
+
* Combines semantic (HNSW / flat) and lexical (BM25) search results using
|
|
552
|
+
* min-max normalisation and a configurable linear blend.
|
|
553
|
+
*/
|
|
554
|
+
|
|
555
|
+
/** Options accepted by `HybridSearch.search()` */
|
|
556
|
+
interface HybridSearchOptions {
|
|
557
|
+
/** Number of results to return (default: 20) */
|
|
558
|
+
topK?: number;
|
|
559
|
+
/** Weight applied to normalised semantic scores (default: 0.7) */
|
|
560
|
+
semanticWeight?: number;
|
|
561
|
+
/** Weight applied to normalised BM25 scores (default: 0.3) */
|
|
562
|
+
lexicalWeight?: number;
|
|
563
|
+
/** HNSW beam width forwarded to the semantic engine (default: 50) */
|
|
564
|
+
ef?: number;
|
|
565
|
+
/** Search mode: `'semantic'`, `'lexical'`, or `'hybrid'` (default: `'hybrid'`) */
|
|
566
|
+
mode?: 'semantic' | 'lexical' | 'hybrid';
|
|
567
|
+
}
|
|
568
|
+
/**
|
|
569
|
+
* Hybrid search combining a semantic vector engine and an optional BM25 engine.
|
|
570
|
+
*
|
|
571
|
+
* In `'hybrid'` mode both engines are queried in parallel; scores are
|
|
572
|
+
* min-max normalised independently and then linearly blended.
|
|
573
|
+
*
|
|
574
|
+
* Example:
|
|
575
|
+
* ```ts
|
|
576
|
+
* const hybrid = new HybridSearch(hnswEngine, bm25Engine, id => titlesMap[id])
|
|
577
|
+
* const results = await hybrid.search(queryVec, 'development finance', { topK: 10 })
|
|
578
|
+
* ```
|
|
579
|
+
*/
|
|
580
|
+
declare class HybridSearch {
|
|
581
|
+
private readonly semantic;
|
|
582
|
+
private readonly bm25;
|
|
583
|
+
private readonly idToMeta;
|
|
584
|
+
/**
|
|
585
|
+
* @param semanticEngine - Initialised `SearchEngine` (FlatEngine or HNSWEngine)
|
|
586
|
+
* @param bm25Engine - Optional BM25 engine; pass `null` to disable lexical search
|
|
587
|
+
* @param idToMeta - Optional callback to look up display metadata by document ID
|
|
588
|
+
*/
|
|
589
|
+
constructor(semanticEngine: SearchEngine, bm25Engine?: BM25Engine | null, idToMeta?: ((id: number | string) => Partial<SearchResult>) | null);
|
|
590
|
+
/**
|
|
591
|
+
* Run a hybrid (or single-mode) search query.
|
|
592
|
+
*
|
|
593
|
+
* @param queryVec - L2-normalised query embedding, or `null` for lexical-only mode
|
|
594
|
+
* @param queryText - Raw query string for BM25, or empty string for semantic-only mode
|
|
595
|
+
* @param opts - Search options
|
|
596
|
+
* @returns Top-K results sorted by descending combined score
|
|
597
|
+
*/
|
|
598
|
+
search(queryVec: Float32Array | null, queryText: string, opts?: HybridSearchOptions): Promise<SearchResult[]>;
|
|
599
|
+
/**
|
|
600
|
+
* Run the BM25 engine and map raw `[docIdx, score]` tuples to `SearchResult` objects.
|
|
601
|
+
*
|
|
602
|
+
* @param queryText - Raw query string
|
|
603
|
+
* @param topK - Maximum number of results to return
|
|
604
|
+
* @returns BM25 results as `SearchResult` objects (score order: descending)
|
|
605
|
+
*/
|
|
606
|
+
private _runBM25;
|
|
607
|
+
/**
|
|
608
|
+
* Format single-mode results, adding the appropriate `semanticScore` /
|
|
609
|
+
* `lexicalScore` fields expected by callers.
|
|
610
|
+
*
|
|
611
|
+
* @param results - Raw results from one engine
|
|
612
|
+
* @param topK - Slice limit
|
|
613
|
+
* @param source - Which engine produced the results
|
|
614
|
+
* @returns Results annotated with zeroed-out score fields for the unused engine
|
|
615
|
+
*/
|
|
616
|
+
private _formatResults;
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
/**
|
|
620
|
+
* fetch-json.ts
|
|
621
|
+
*
|
|
622
|
+
* Utility for fetching JSON (plain or gzip-compressed) with optional
|
|
623
|
+
* Cache Storage read/write so repeat cold-starts skip the network.
|
|
624
|
+
*/
|
|
625
|
+
interface FetchJsonOptions {
|
|
626
|
+
/**
|
|
627
|
+
* When provided, the response is read from (and written to) a named
|
|
628
|
+
* Cache Storage bucket. Pass `null` to disable caching entirely.
|
|
629
|
+
*/
|
|
630
|
+
cacheName?: string | null;
|
|
631
|
+
}
|
|
632
|
+
/**
|
|
633
|
+
* Fetch a JSON resource, transparently handling gzip-compressed responses.
|
|
634
|
+
*
|
|
635
|
+
* Caching behaviour:
|
|
636
|
+
* 1. If `cacheName` is set and the Cache API is available, attempt a cache hit.
|
|
637
|
+
* 2. On a miss, fetch from the network.
|
|
638
|
+
* 3. Decompress if the URL ends with `.gz` and the server did not already
|
|
639
|
+
* decompress it (i.e. `Content-Encoding` is absent or non-gzip).
|
|
640
|
+
* 4. Write the parsed object back to Cache Storage for future requests.
|
|
641
|
+
*
|
|
642
|
+
* @param url - Absolute or relative URL to fetch
|
|
643
|
+
* @param opts - Optional caching configuration
|
|
644
|
+
* @returns Parsed JSON payload cast to `T`
|
|
645
|
+
* @throws {Error} On non-2xx HTTP responses
|
|
646
|
+
*/
|
|
647
|
+
declare function fetchJson<T = unknown>(url: string, opts?: FetchJsonOptions): Promise<T>;
|
|
648
|
+
|
|
649
|
+
/**
|
|
650
|
+
* int8-codec.ts
|
|
651
|
+
*
|
|
652
|
+
* Quantization scheme: vectors are stored as Int8 values in [-127, 127].
|
|
653
|
+
* Each vector is accompanied by a scalar `scale` such that the original
|
|
654
|
+
* float value ≈ int8_value * scale. Dot products are computed in mixed
|
|
655
|
+
* precision (Float32 query × dequantized Int8 stored vector) to keep
|
|
656
|
+
* both accuracy and memory efficiency.
|
|
657
|
+
*/
|
|
658
|
+
/**
|
|
659
|
+
* Compute the dot product between a Float32 query vector and a stored
|
|
660
|
+
* Int8-quantized vector, dequantizing on the fly.
|
|
661
|
+
*
|
|
662
|
+
* @param queryF32 - L2-normalised query vector (Float32Array)
|
|
663
|
+
* @param storedQV - Int8-quantized stored vector
|
|
664
|
+
* @param storedScale - Per-vector dequantization scale factor
|
|
665
|
+
* @returns Approximate cosine similarity score
|
|
666
|
+
*/
|
|
667
|
+
declare function dotProductMixed(queryF32: Float32Array, storedQV: Int8Array, storedScale: number): number;
|
|
668
|
+
/**
|
|
669
|
+
* Dequantize an Int8 vector back to Float32 using the stored scale.
|
|
670
|
+
*
|
|
671
|
+
* @param qv - Int8-quantized vector
|
|
672
|
+
* @param scale - Per-vector dequantization scale factor
|
|
673
|
+
* @returns Reconstructed Float32 vector
|
|
674
|
+
*/
|
|
675
|
+
declare function dequantize(qv: Int8Array, scale: number): Float32Array;
|
|
676
|
+
/**
|
|
677
|
+
* L2-normalise a Float32 vector in place.
|
|
678
|
+
* Vectors whose norm is below 1e-9 are left unchanged to avoid division by zero.
|
|
679
|
+
*
|
|
680
|
+
* @param vec - Vector to normalise (mutated in place)
|
|
681
|
+
* @returns The same (now normalised) vector
|
|
682
|
+
*/
|
|
683
|
+
declare function l2NormalizeInPlace(vec: Float32Array): Float32Array;
|
|
684
|
+
/**
|
|
685
|
+
* Convert a plain number array (or an existing Int8Array) to an Int8Array.
|
|
686
|
+
* Values outside [-128, 127] are silently truncated by the typed-array constructor.
|
|
687
|
+
*
|
|
688
|
+
* @param arr - Source values
|
|
689
|
+
* @returns An Int8Array view / copy of the input
|
|
690
|
+
*/
|
|
691
|
+
declare function toInt8Array(arr: number[] | Int8Array): Int8Array;
|
|
692
|
+
|
|
693
|
+
export { type BM25CorpusEntry, type BM25Engine, type CollectionManifest, FlatEngine, type FlatIndexConfig, type FlatItem, type GeographicCoverage, type HNSWConfig, HNSWEngine, type HNSWIndexConfig, HybridSearch, SearchClient, type SearchClientOptions, type SearchEngine, type SearchMode, type SearchOptions, type SearchResult, type SearchStats, type Shard, type ShardNode, type WorkerErrorMessage, type WorkerInboundMessage, type WorkerIndexReadyMessage, type WorkerInitMessage, type WorkerOutboundMessage, type WorkerProgressMessage, type WorkerReadyMessage, type WorkerResultsMessage, type WorkerSearchMessage, dequantize, dotProductMixed, fetchJson, l2NormalizeInPlace, toInt8Array };
|