albex 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +191 -0
- package/README.md +30 -19
- package/dist/albex-worker.d.ts +65 -2
- package/dist/albex-worker.d.ts.map +1 -1
- package/dist/albex-worker.js +97 -20
- package/dist/albex-worker.js.map +1 -1
- package/dist/albex.d.ts +206 -42
- package/dist/albex.d.ts.map +1 -1
- package/dist/albex.js +384 -103
- package/dist/albex.js.map +1 -1
- package/dist/errors.d.ts +35 -4
- package/dist/errors.d.ts.map +1 -1
- package/dist/errors.js +37 -2
- package/dist/errors.js.map +1 -1
- package/dist/persistence.js +1 -1
- package/dist/pool/coordinator.d.ts +14 -6
- package/dist/pool/coordinator.d.ts.map +1 -1
- package/dist/pool/coordinator.js +65 -28
- package/dist/pool/coordinator.js.map +1 -1
- package/dist/profile.js +1 -1
- package/dist/resource-manager.js +1 -1
- package/dist/tiered-store.js +1 -1
- package/dist/wasm-bindings.d.ts +50 -1
- package/dist/wasm-bindings.d.ts.map +1 -1
- package/dist/wasm-bindings.js +19 -11
- package/dist/wasm-bindings.js.map +1 -1
- package/dist/worker-protocol.d.ts +23 -2
- package/dist/worker-protocol.d.ts.map +1 -1
- package/dist/worker-protocol.js +1 -1
- package/dist/worker-runtime.js +16 -1
- package/dist/worker-runtime.js.map +1 -1
- package/package.json +1 -1
- package/src/albex-worker.ts +103 -18
- package/src/albex.ts +2937 -2524
- package/src/errors.ts +49 -4
- package/src/pool/coordinator.ts +61 -34
- package/src/wasm-bindings.ts +78 -12
- package/src/worker-protocol.ts +12 -2
- package/src/worker-runtime.ts +16 -1
- package/wasm/pkg/albex_pdf.wasm +0 -0
- package/wasm/pkg/albex_wasm.wasm +0 -0
- package/wasm/pkg/albex_wasm_bg.wasm +0 -0
- package/wasm/pkg/albex_wasm_simd.wasm +0 -0
package/dist/albex.d.ts
CHANGED
|
@@ -13,7 +13,6 @@
|
|
|
13
13
|
* const results = engine.search('contrato marco');
|
|
14
14
|
* ```
|
|
15
15
|
*/
|
|
16
|
-
import { type Tier } from './profile.js';
|
|
17
16
|
export { AlbexError, AlbexInitError, AlbexUnsupportedFormatError, AlbexParseError, AlbexCapacityError, } from './errors.js';
|
|
18
17
|
export { listPersisted, deletePersisted } from './persistence.js';
|
|
19
18
|
export { detectProfile, pickTier, pickWorkerCount, shouldUseGpu } from './profile.js';
|
|
@@ -25,29 +24,64 @@ export type { AlbexPoolOptions } from './pool/coordinator.js';
|
|
|
25
24
|
export { BloomGpu, packBloomsFromChunks } from './gpu/bloom-runtime.js';
|
|
26
25
|
export { TieredStore } from './tiered-store.js';
|
|
27
26
|
export type { TieredStoreOptions } from './tiered-store.js';
|
|
27
|
+
/**
|
|
28
|
+
* Explicit engine capacity configuration. All fields optional — anything
|
|
29
|
+
* missing is completed from the std defaults, scaled to keep the std
|
|
30
|
+
* ratios when a related field IS provided (documented per field).
|
|
31
|
+
*
|
|
32
|
+
* Estimated WASM memory ≈ `maxChunks × 64 B` (32 B descriptor + 32 B
|
|
33
|
+
* trigram signature) `+ textPoolBytes + namePoolBytes + maxDocs × 28 B`,
|
|
34
|
+
* on top of the engine's fixed ~80 KB of scratch buffers. The std preset
|
|
35
|
+
* is ~22 MB; `'large'` is ~180 MB. WASM linear memory never shrinks: the
|
|
36
|
+
* high-water mark of the largest capacity ever initialised in a given
|
|
37
|
+
* engine stays committed until the engine is disposed.
|
|
38
|
+
*/
|
|
39
|
+
export interface AlbexCapacityConfig {
|
|
40
|
+
/** Maximum number of live documents. Default 128. Range 1–65 536. */
|
|
41
|
+
maxDocs?: number;
|
|
42
|
+
/** Maximum number of indexed chunks (≈ paragraphs; long paragraphs split
|
|
43
|
+
* every 512 bytes). Default `max(maxDocs × 782, 1024)` — the std ratio
|
|
44
|
+
* (100 000 chunks / 128 docs). Range maxDocs–4 194 304. */
|
|
45
|
+
maxChunks?: number;
|
|
46
|
+
/** Text pool size in bytes (total UTF-8 text the index can hold).
|
|
47
|
+
* Default `max(maxChunks × 168, 64 KiB)` — the std ratio (16 MiB /
|
|
48
|
+
* 100 000 chunks). Range 4 KiB–1 GiB. */
|
|
49
|
+
textPoolBytes?: number;
|
|
50
|
+
/** Filename pool size in bytes. Default `max(maxDocs × 256, 4 KiB)` —
|
|
51
|
+
* the std ratio (32 KiB / 128 docs). Range 256 B–16 MiB. */
|
|
52
|
+
namePoolBytes?: number;
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Engine capacity (decision A16 — runtime capacity, single binary):
|
|
56
|
+
* `'std'` — 128 docs · 100k chunks · 16 MB text · 32 KB names (~22 MB).
|
|
57
|
+
* The default, identical to every previous release.
|
|
58
|
+
* `'large'` — 1 024 docs · 800k chunks · 128 MB text · 256 KB names
|
|
59
|
+
* (~180 MB) — the old compile-time "pro" tier.
|
|
60
|
+
* object — explicit {@link AlbexCapacityConfig}.
|
|
61
|
+
*/
|
|
62
|
+
export type AlbexCapacity = 'std' | 'large' | AlbexCapacityConfig;
|
|
28
63
|
export interface AlbexOptions {
|
|
29
64
|
/**
|
|
30
|
-
* Explicit URL to the main WASM binary.
|
|
31
|
-
*
|
|
32
|
-
* If you want automatic tier selection (mini/std/pro chosen from
|
|
33
|
-
* `deviceMemory`), pass `wasmBaseUrl` instead — the engine will fetch
|
|
34
|
-
* `albex_wasm_<tier>.wasm` from that directory.
|
|
65
|
+
* Explicit URL to the main WASM binary. If omitted, the baseline (or
|
|
66
|
+
* SIMD, when supported) binary packaged with albex is used.
|
|
35
67
|
*/
|
|
36
68
|
wasmUrl?: string;
|
|
37
69
|
/**
|
|
38
|
-
* Base directory containing
|
|
39
|
-
* `
|
|
70
|
+
* Base directory containing the two binaries (`albex_wasm.wasm`,
|
|
71
|
+
* `albex_wasm_simd.wasm`). Used when `wasmUrl` is omitted.
|
|
40
72
|
*/
|
|
41
73
|
wasmBaseUrl?: string;
|
|
42
74
|
/** URL to albex_pdf.wasm. Required only if you call indexFile() with PDFs. */
|
|
43
75
|
pdfWasmUrl?: string;
|
|
44
76
|
/**
|
|
45
|
-
*
|
|
46
|
-
*
|
|
77
|
+
* Engine capacity. `'std'` (default) keeps the historical limits;
|
|
78
|
+
* `'large'` raises them to 1 024 docs / 800k chunks / 128 MB text; an
|
|
79
|
+
* object configures each pool explicitly (see {@link AlbexCapacityConfig}
|
|
80
|
+
* for defaults, ranges and the memory cost model). Replaces the removed
|
|
81
|
+
* compile-time `tier` option — capacity is now a runtime parameter of a
|
|
82
|
+
* single binary (`initWithCapacity`, ABI 7).
|
|
47
83
|
*/
|
|
48
|
-
|
|
49
|
-
* pass `'auto'` or omit. Other values are accepted and ignored. */
|
|
50
|
-
tier?: 'auto' | 'mini' | 'std' | 'pro';
|
|
84
|
+
capacity?: AlbexCapacity;
|
|
51
85
|
/**
|
|
52
86
|
* SIMD selection. When `'auto'` (default), Albex probes for v128 support
|
|
53
87
|
* and fetches the `_simd.wasm` variant when available. Pass `'off'` to
|
|
@@ -68,6 +102,16 @@ export interface AlbexOptions {
|
|
|
68
102
|
* the upload + dispatch overhead is bigger than the speedup. Default: 20_000.
|
|
69
103
|
*/
|
|
70
104
|
gpuThreshold?: number;
|
|
105
|
+
/**
|
|
106
|
+
* Maximum size (in bytes) of a file accepted by `indexFile`. Checked
|
|
107
|
+
* against `File.size` BEFORE the file is read, so an oversized input is
|
|
108
|
+
* rejected with a typed `AlbexCapacityError` (`limit: 'file'`) without
|
|
109
|
+
* ever being buffered or hashed. Default: 256 MiB — above any default
|
|
110
|
+
* text pool (16 MB std / 128 MB large), so this only stops pathological
|
|
111
|
+
* inputs (e.g. a 2 GB log file) from exhausting tab memory. Raise it if
|
|
112
|
+
* you configure a custom `capacity` with a text pool beyond 256 MiB.
|
|
113
|
+
*/
|
|
114
|
+
maxFileBytes?: number;
|
|
71
115
|
}
|
|
72
116
|
export interface IndexedDocument {
|
|
73
117
|
name: string;
|
|
@@ -80,27 +124,77 @@ export interface IndexedDocument {
|
|
|
80
124
|
/** 64-bit FNV-1a hex of the source file bytes. Stable across runs. */
|
|
81
125
|
contentHash: string;
|
|
82
126
|
}
|
|
127
|
+
/**
|
|
128
|
+
* One authoritative chunk of an indexed document — the exact unit Albex
|
|
129
|
+
* indexed and searches over. Returned by {@link AlbexEngine.listChunks} so a
|
|
130
|
+
* host can mirror Albex's chunking (e.g. to build a parallel vector index over
|
|
131
|
+
* the same units). A long paragraph is split into several ≤512-byte chunks that
|
|
132
|
+
* share a `location` but differ in `sub`.
|
|
133
|
+
*/
|
|
134
|
+
export interface AuthoritativeChunk {
|
|
135
|
+
/** WASM-side stable document id (survives compact()). */
|
|
136
|
+
docId: number;
|
|
137
|
+
/** Paragraph index (DOCX/TXT) or page number (PDF, 1-based). */
|
|
138
|
+
location: number;
|
|
139
|
+
/** Ordinal of this chunk within its document (0-based, compact()-stable). */
|
|
140
|
+
ord: number;
|
|
141
|
+
/** Ordinal of this chunk within its `location` group (0-based, informational). */
|
|
142
|
+
sub: number;
|
|
143
|
+
/** The exact UTF-8 text Albex indexed for this chunk. */
|
|
144
|
+
text: string;
|
|
145
|
+
/** Byte length of `text`. */
|
|
146
|
+
byteLen: number;
|
|
147
|
+
/**
|
|
148
|
+
* Canonical shared id `"<docId>::<ord>"`. Identical to the matching
|
|
149
|
+
* {@link SearchResult.chunkId}, and stable across compact() and snapshot
|
|
150
|
+
* save/load — so it is safe to persist alongside parallel structures
|
|
151
|
+
* (unlike the absolute chunk index, which compact() renumbers).
|
|
152
|
+
*/
|
|
153
|
+
id: string;
|
|
154
|
+
}
|
|
83
155
|
export interface MatchSpan {
|
|
84
|
-
/**
|
|
156
|
+
/** UTF-8 **byte** offset within `snippet` where this matched token begins.
|
|
157
|
+
* NOT a JS string index — for `snippet.slice()` use the UTF-16
|
|
158
|
+
* {@link SearchResult.snippetStart}/{@link SearchResult.snippetEnd} of the
|
|
159
|
+
* primary span instead. */
|
|
85
160
|
start: number;
|
|
86
|
-
/**
|
|
161
|
+
/** UTF-8 **byte** offset within `snippet` where this matched token ends
|
|
162
|
+
* (exclusive). See the note on `start`. */
|
|
87
163
|
end: number;
|
|
88
164
|
}
|
|
89
165
|
export interface SearchResult {
|
|
90
166
|
documentName: string;
|
|
167
|
+
/** WASM-side stable document id (survives compact()). Matches
|
|
168
|
+
* `AuthoritativeChunk.docId` from {@link AlbexEngine.listChunks}. */
|
|
169
|
+
docId: number;
|
|
91
170
|
/** Paragraph index (DOCX/TXT) or page number (PDF, 1-based). */
|
|
92
171
|
location: number;
|
|
172
|
+
/** Canonical chunk id `"<docId>::<ord>"` — identical to the matching
|
|
173
|
+
* {@link AuthoritativeChunk.id}, so a host can fuse search hits with a
|
|
174
|
+
* parallel index (e.g. embeddings) on this key. Stable across compact(). */
|
|
175
|
+
chunkId: string;
|
|
93
176
|
/** Relevance score 0–1000. */
|
|
94
177
|
score: number;
|
|
95
178
|
/** Snippet text. With `windowed` search options this is a substring with
|
|
96
179
|
* ASCII ellipsis sentinels (`"... "` / `" ..."`) the UI should render
|
|
97
180
|
* as `…`. Without windowing, the full chunk text. */
|
|
98
181
|
snippet: string;
|
|
99
|
-
/** Primary token match
|
|
182
|
+
/** Primary token match as UTF-8 **byte** offsets within the encoded
|
|
183
|
+
* snippet (kept for backwards compatibility; equal to `matches[0]`).
|
|
184
|
+
* Byte offsets drift from JS string indices as soon as the snippet
|
|
185
|
+
* contains accents/ñ/emoji — use {@link snippetStart}/{@link snippetEnd}
|
|
186
|
+
* for `snippet.slice()` / UI highlighting. */
|
|
100
187
|
matchStart: number;
|
|
101
188
|
matchEnd: number;
|
|
102
|
-
/** All matched token spans within `snippet
|
|
189
|
+
/** All matched token spans within `snippet` as UTF-8 **byte** offsets,
|
|
190
|
+
* in query order. Length 1–4. */
|
|
103
191
|
matches: MatchSpan[];
|
|
192
|
+
/** Primary token match start as a UTF-16 code-unit index into `snippet` —
|
|
193
|
+
* safe to pass directly to `snippet.slice(snippetStart, snippetEnd)`. */
|
|
194
|
+
snippetStart: number;
|
|
195
|
+
/** Primary token match end (exclusive) as a UTF-16 code-unit index into
|
|
196
|
+
* `snippet`. */
|
|
197
|
+
snippetEnd: number;
|
|
104
198
|
}
|
|
105
199
|
/**
|
|
106
200
|
* Options that change how snippets are produced. Both fields are optional.
|
|
@@ -127,15 +221,17 @@ export interface SearchOptions {
|
|
|
127
221
|
export interface EngineStats {
|
|
128
222
|
documents: number;
|
|
129
223
|
chunks: number;
|
|
224
|
+
/** Bytes of indexed text currently in the text pool. */
|
|
130
225
|
textUsed: number;
|
|
226
|
+
/** RUNTIME text pool capacity in bytes (= resolved `textPoolBytes`). */
|
|
131
227
|
textCapacity: number;
|
|
132
228
|
wasmMemoryBytes: number;
|
|
133
|
-
/**
|
|
134
|
-
tier: Tier | null;
|
|
135
|
-
/** Compile-time chunk capacity for the loaded tier. */
|
|
229
|
+
/** RUNTIME chunk capacity the engine was initialised with. */
|
|
136
230
|
maxChunks: number;
|
|
137
|
-
/**
|
|
231
|
+
/** RUNTIME document capacity the engine was initialised with. */
|
|
138
232
|
maxDocs: number;
|
|
233
|
+
/** RUNTIME filename pool capacity in bytes. */
|
|
234
|
+
namePoolBytes: number;
|
|
139
235
|
}
|
|
140
236
|
export interface SearchStats {
|
|
141
237
|
query: string;
|
|
@@ -144,6 +240,14 @@ export interface SearchStats {
|
|
|
144
240
|
bloomTested: number;
|
|
145
241
|
bloomPassed: number;
|
|
146
242
|
bitapMatched: number;
|
|
243
|
+
/** True if the query had more than 8 OR branches — the extras were
|
|
244
|
+
* discarded and did not contribute results. */
|
|
245
|
+
truncatedBranches?: boolean;
|
|
246
|
+
/** True if tokens were dropped (more than 4 per branch) or clipped
|
|
247
|
+
* (longer than 64 bytes). */
|
|
248
|
+
truncatedTokens?: boolean;
|
|
249
|
+
/** True if the raw query exceeded 1024 bytes and was cut. */
|
|
250
|
+
truncatedQuery?: boolean;
|
|
147
251
|
}
|
|
148
252
|
/**
|
|
149
253
|
* One structured warning recorded by the engine during indexFile or
|
|
@@ -232,22 +336,34 @@ export declare class AlbexEngine {
|
|
|
232
336
|
private _pdfMem;
|
|
233
337
|
private _docs;
|
|
234
338
|
private _lastSearch;
|
|
339
|
+
/** Raw truncation bitflags from the most recent prepareQuery (ABI 5):
|
|
340
|
+
* 1 = branches dropped, 2 = tokens dropped/clipped, 4 = query bytes cut.
|
|
341
|
+
* Captured right after prepareQuery so every _lastSearch built for that
|
|
342
|
+
* query (including per-branch OR runs) reports the same flags. */
|
|
343
|
+
private _lastTruncFlags;
|
|
235
344
|
/** Structured diagnostics collected during the most recent operation.
|
|
236
345
|
* Drained by `takeDiagnostics()`. Capped at 256 entries to avoid
|
|
237
346
|
* unbounded memory growth in pathological cases (very corrupted
|
|
238
347
|
* corpora producing thousands of recovery warnings). */
|
|
239
348
|
private _diagnostics;
|
|
240
|
-
|
|
349
|
+
/** Resolved runtime capacity (set in init(); reused by reset()). */
|
|
350
|
+
private _capacity;
|
|
241
351
|
private _simd;
|
|
242
352
|
private _profile;
|
|
243
353
|
private _resources;
|
|
244
354
|
private _gpu;
|
|
245
|
-
|
|
355
|
+
/** True when the GPU-resident Bloom array no longer mirrors the WASM
|
|
356
|
+
* chunk array. Set by EVERY index mutation (indexFile, removeDocument,
|
|
357
|
+
* compact, reset, load) and cleared after a successful upload. A plain
|
|
358
|
+
* chunk-count comparison is NOT enough: compact() can reorder blooms
|
|
359
|
+
* while keeping the count identical, which would silently filter the
|
|
360
|
+
* wrong chunks (audit 1.5). */
|
|
361
|
+
private _gpuUploadDirty;
|
|
246
362
|
private _unsubscribeResources;
|
|
247
363
|
private readonly _opts;
|
|
248
364
|
private _opChain;
|
|
249
365
|
private _busy;
|
|
250
|
-
constructor(opts
|
|
366
|
+
constructor(opts?: AlbexOptions);
|
|
251
367
|
/** Serialize an async engine operation behind any in-flight one. */
|
|
252
368
|
private _exclusive;
|
|
253
369
|
/** Guard a synchronous mutator/search: refuse to run mid-async-operation
|
|
@@ -256,30 +372,32 @@ export declare class AlbexEngine {
|
|
|
256
372
|
/** Compact opportunistically when tombstones pile up under text pressure,
|
|
257
373
|
* so repeated removeDocument/replaceDocument don't exhaust the pool. */
|
|
258
374
|
private _autoCompactIfNeeded;
|
|
259
|
-
/**
|
|
375
|
+
/**
|
|
376
|
+
* Load and initialise the main WASM module. Must be called before any
|
|
377
|
+
* other method.
|
|
378
|
+
*
|
|
379
|
+
* Resolves `opts.capacity` ('std' default · 'large' · explicit object)
|
|
380
|
+
* and sizes the WASM pools accordingly via `initWithCapacity` (ABI 7).
|
|
381
|
+
* Memory cost ≈ `maxChunks × 64 B + textPoolBytes + namePoolBytes` —
|
|
382
|
+
* ~22 MB for 'std', ~180 MB for 'large'. Throws `AlbexInitError` if the
|
|
383
|
+
* requested capacity is out of range or the allocation fails.
|
|
384
|
+
*/
|
|
260
385
|
init(): Promise<void>;
|
|
261
386
|
/**
|
|
262
387
|
* Decide which `.wasm` binary to fetch. Order of precedence:
|
|
263
|
-
* 1. `opts.wasmUrl` if provided — used verbatim.
|
|
264
|
-
* 2. `opts.tier` if explicit — joined with `wasmBaseUrl`.
|
|
265
|
-
* 3. `opts.wasmBaseUrl` + tier picked from the device profile.
|
|
266
|
-
*
|
|
267
|
-
* Order of precedence:
|
|
268
388
|
* 1. `opts.wasmUrl` literal → use verbatim
|
|
269
|
-
* 2. `opts.wasmBaseUrl` +
|
|
389
|
+
* 2. `opts.wasmBaseUrl` + simd suffix → fetched from that directory
|
|
270
390
|
* 3. zero-config default → `albex_wasm_bg.wasm` packaged
|
|
271
391
|
* next to this file, resolved
|
|
272
392
|
* via `import.meta.url`
|
|
273
393
|
*
|
|
274
|
-
*
|
|
275
|
-
*
|
|
276
|
-
*
|
|
277
|
-
*
|
|
278
|
-
*
|
|
394
|
+
* There are exactly two main binaries (baseline + SIMD); capacity is a
|
|
395
|
+
* RUNTIME parameter since ABI 7, so it never affects which file is
|
|
396
|
+
* fetched. SIMD auto-detection is only active when `wasmBaseUrl` is
|
|
397
|
+
* given, because picking a URL at runtime would defeat any bundler's
|
|
398
|
+
* static asset rewriting.
|
|
279
399
|
*/
|
|
280
400
|
private _resolveWasmUrl;
|
|
281
|
-
/** The tier that was actually loaded. `null` until `init()` resolves. */
|
|
282
|
-
get tier(): Tier | null;
|
|
283
401
|
/** True if the SIMD-accelerated binary was loaded. */
|
|
284
402
|
get simdEnabled(): boolean;
|
|
285
403
|
/** True if a WebGPU device is acquired and the next search will use it. */
|
|
@@ -302,12 +420,23 @@ export declare class AlbexEngine {
|
|
|
302
420
|
* No-op if the GPU device hasn't been acquired yet — first call attempts
|
|
303
421
|
* `init()` lazily; if that fails, the candidate path is permanently
|
|
304
422
|
* disabled for this engine instance.
|
|
423
|
+
*
|
|
424
|
+
* IMPORTANT: this method CLOBBERS the scratchpad (the candidate bitset
|
|
425
|
+
* is pushed through it via `setCandidateMask`). Any pattern previously
|
|
426
|
+
* staged by `selectQueryBranch` is destroyed — the caller MUST re-select
|
|
427
|
+
* the active branch before calling `searchBegin`, which snapshots the
|
|
428
|
+
* pattern from the scratchpad (audit 1.2).
|
|
305
429
|
*/
|
|
306
430
|
private _gpuPreFilter;
|
|
307
431
|
private _u8;
|
|
308
432
|
private _writePad;
|
|
309
433
|
private _writeStr;
|
|
310
434
|
private _readPad;
|
|
435
|
+
/** Copy `n` scratchpad bytes out of WASM memory. The copy is private to
|
|
436
|
+
* JS, so it survives later WASM calls (and memory growth) — used when the
|
|
437
|
+
* caller needs both the raw bytes (UTF-16 span mapping) and the decoded
|
|
438
|
+
* string of the same payload. */
|
|
439
|
+
private _readPadBytes;
|
|
311
440
|
private _feedText;
|
|
312
441
|
/**
|
|
313
442
|
* Compute the FNV-1a 64-bit content hash of `bytes` via the WASM
|
|
@@ -404,7 +533,9 @@ export declare class AlbexEngine {
|
|
|
404
533
|
private _indexRtf;
|
|
405
534
|
private static readonly _INDEXERS;
|
|
406
535
|
/**
|
|
407
|
-
* Index a file. Supported formats: DOCX, XLSX, PDF,
|
|
536
|
+
* Index a file. Supported formats (11, with varying depth): DOCX, XLSX, PDF,
|
|
537
|
+
* HTML, MD, JSON, CSV, EML, RTF, TXT, XML. Several are deliberately "lite"
|
|
538
|
+
* (CSV is RFC-4180-lite, EML is MIME-lite, RTF is regex-stripped).
|
|
408
539
|
* Throws for unsupported formats or parse errors.
|
|
409
540
|
*/
|
|
410
541
|
indexFile(file: File): Promise<IndexedDocument>;
|
|
@@ -432,6 +563,21 @@ export declare class AlbexEngine {
|
|
|
432
563
|
* references (e.g. in a UI) remain valid.
|
|
433
564
|
*/
|
|
434
565
|
compact(): void;
|
|
566
|
+
/**
|
|
567
|
+
* Enumerate the authoritative chunks Albex indexed for a document, in order.
|
|
568
|
+
* Lets a host mirror Albex's exact chunking — e.g. embed the same units for a
|
|
569
|
+
* parallel semantic index keyed on the same {@link AuthoritativeChunk.id}
|
|
570
|
+
* (`"<docId>::<ord>"`, identical to {@link SearchResult.chunkId}). `docId` is
|
|
571
|
+
* `IndexedDocument.docId` from {@link indexFile}; returns `[]` if no live
|
|
572
|
+
* document has that id.
|
|
573
|
+
*
|
|
574
|
+
* The returned `id`/`ord`/`sub` are stable across {@link compact} and
|
|
575
|
+
* snapshot save/load. Never key persistent structures on a search result's
|
|
576
|
+
* absolute `chunkIdx`, which {@link compact} renumbers.
|
|
577
|
+
*/
|
|
578
|
+
listChunks(docId: number): AuthoritativeChunk[];
|
|
579
|
+
/** Doc-table slot (0..getDocCount) whose stable id is `docId`, or -1. */
|
|
580
|
+
private _docSlotOf;
|
|
435
581
|
/**
|
|
436
582
|
* Search the index. Supports:
|
|
437
583
|
* - Simple queries: `contrato` (AND of tokens, accent-insensitive)
|
|
@@ -440,6 +586,11 @@ export declare class AlbexEngine {
|
|
|
440
586
|
*
|
|
441
587
|
* Pass `{ windowed: true }` to receive cropped snippets with ASCII ellipsis
|
|
442
588
|
* markers instead of full chunk text. Defaults: 60 bytes before, 120 after.
|
|
589
|
+
*
|
|
590
|
+
* Note: this synchronous path never uses the GPU pre-filter — the WebGPU
|
|
591
|
+
* scan is asynchronous by nature. Only `searchCooperative` (the budgeted
|
|
592
|
+
* path) engages the GPU; `search()` always runs the CPU Bloom pre-filter,
|
|
593
|
+
* regardless of the `gpu` option.
|
|
443
594
|
*/
|
|
444
595
|
search(query: string, opts?: SearchOptions): SearchResult[];
|
|
445
596
|
/** Read the WASM-compiled tokens of branch `i` for phrase post-filter.
|
|
@@ -481,12 +632,24 @@ export declare class AlbexEngine {
|
|
|
481
632
|
* may eat the entire budget, which is also fine.
|
|
482
633
|
*/
|
|
483
634
|
private _runSearchBudgeted;
|
|
635
|
+
/** Truncation booleans for SearchStats, decoded from the flags the WASM
|
|
636
|
+
* reported for the most recent prepareQuery (audit 1.6 — the engine used
|
|
637
|
+
* to drop OR branches past 8 and tokens past 4 in silence). */
|
|
638
|
+
private _truncStats;
|
|
484
639
|
/** Materialise results [0..count) into the public SearchResult shape.
|
|
485
640
|
* When `phraseTokens` is given, each result is kept only if those tokens
|
|
486
641
|
* appear adjacently in the FULL chunk text — independent of any display
|
|
487
|
-
* windowing — so phrase queries stay correct under `{ windowed: true }`.
|
|
642
|
+
* windowing — so phrase queries stay correct under `{ windowed: true }`.
|
|
643
|
+
*
|
|
644
|
+
* Frontier discipline (audit 2.1): all numeric fields of every result are
|
|
645
|
+
* read in ONE DataView pass over the `#[repr(C)]` RESULTS array
|
|
646
|
+
* (`getResultsPtr`/`getResultStride`, ABI 6) — the old path made 12-15
|
|
647
|
+
* frontier calls per result. Strings still need calls, minimised to one
|
|
648
|
+
* snippet read per result plus one doc-name read per DISTINCT document
|
|
649
|
+
* (the old `getResultDocName` was additionally O(doc_count) inside WASM
|
|
650
|
+
* for every single result). */
|
|
488
651
|
private _collectResults;
|
|
489
|
-
/** Run all OR branches and merge dedup-by-(
|
|
652
|
+
/** Run all OR branches and merge dedup-by-(chunkId, matchStart). The
|
|
490
653
|
* branches are already compiled inside the WASM (by prepareQuery); we
|
|
491
654
|
* iterate them with selectQueryBranch. The "rawQuery" param is kept
|
|
492
655
|
* only for the lastSearch.query field. */
|
|
@@ -495,7 +658,8 @@ export declare class AlbexEngine {
|
|
|
495
658
|
* active (set via selectQueryBranch). Returns the materialised
|
|
496
659
|
* SearchResult[]. Caller is responsible for activating a branch first. */
|
|
497
660
|
private _runSearch;
|
|
498
|
-
/** Returns current engine statistics
|
|
661
|
+
/** Returns current engine statistics (capacities are the RUNTIME values
|
|
662
|
+
* the engine was initialised with via the `capacity` option). */
|
|
499
663
|
getStats(): EngineStats;
|
|
500
664
|
/** Returns stats from the most recent search, or null. */
|
|
501
665
|
getLastSearchStats(): SearchStats | null;
|
package/dist/albex.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"albex.d.ts","sourceRoot":"","sources":["../src/albex.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;
|
|
1
|
+
{"version":3,"file":"albex.d.ts","sourceRoot":"","sources":["../src/albex.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AA2BH,OAAO,EACL,UAAU,EACV,cAAc,EACd,2BAA2B,EAC3B,eAAe,EACf,kBAAkB,GACnB,MAAM,aAAa,CAAC;AACrB,OAAO,EAAE,aAAa,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AAClE,OAAO,EAAE,aAAa,EAAE,QAAQ,EAAE,eAAe,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACtF,YAAY,EAAE,aAAa,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACxD,OAAO,EAAE,kBAAkB,EAAE,MAAM,uBAAuB,CAAC;AAC3D,YAAY,EAAE,aAAa,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AACzE,OAAO,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAC;AAClD,YAAY,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,wBAAwB,CAAC;AACxE,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,YAAY,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;AAwB5D;;;;;;;;;;;GAWG;AACH,MAAM,WAAW,mBAAmB;IAClC,qEAAqE;IACrE,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;+DAE2D;IAC3D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;6CAEyC;IACzC,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB;gEAC4D;IAC5D,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAED;;;;;;;GAOG;AACH,MAAM,MAAM,aAAa,GAAG,KAAK,GAAG,OAAO,GAAG,mBAAmB,CAAC;AAElE,MAAM,WAAW,YAAY;IAC3B;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;;OAGG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,8EAA8E;IAC9E,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB;;;;;;;OAOG;IACH,QAAQ,CAAC,EAAE,aAAa,CAAC;IACzB;;;;;OAKG;IACH,IAAI,CAAC,EAAE,MAAM,GAAG,IAAI,GAAG,KAAK,CAAC;IAC7B;;;;;;OAMG;IACH,GAAG,CAAC,EAAE,MAAM,GAAG,IAAI,GAAG,KAAK,CAAC;IAC5B;;;OAGG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB;;;;;;;;OAQG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAmDD,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,6EAA6E;IAC7E,KAAK,EAAE,MAAM,CAAC;IACd,sEAAsE;IACtE,WAAW,EAAE,MAAM,CAAC;CACrB;AAED;;;;;;GAMG;AACH,MAAM,WAAW,kBAAkB;IACjC,yDAAyD;IACzD,KAAK,EAAE,MAAM,CAAC;IACd,gEAAgE;IAChE,QAAQ,EAAE,MAAM,CAAC;IACjB,6EAA6E;IAC7E,GAAG,EAAE,MAAM,CAAC;IACZ,kFAAkF;IAClF,GAAG,EAAE,MAAM,CAAC;IACZ,yDAAyD;IACzD,IAAI,EAAE,MAAM,CAAC;IACb,6BAA6B;IAC7B,OAAO,EAAE,MAAM,CAAC;IAChB;;;;;OAKG;IACH,EAAE,EAAE,MAAM,CAAC;CACZ;AAED,MAAM,WAAW,SAAS;IACxB;;;+BAG2B;IAC3B,KAAK,EAAE,MAAM,CAAC;IACd;+CAC2C;IAC3C,GAAG,EAAE,MAAM,CAAC;CACb;AAED,MAAM,WAAW,YAAY;IAC3B,YAAY,EAAE,MAAM,CAAC;IACrB;yEACqE;IACrE,KAAK,EAAE,MAAM,CAAC;IACd,gEAAgE;IAChE,QAAQ,EAAE,MAAM,CAAC;IACjB;;gFAE4E;IAC5E,OAAO,EAAE,MAAM,CAAC;IAChB,8BAA8B;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd;;0DAEsD;IACtD,OAAO,EAAE,MAAM,CAAC;IAChB;;;;kDAI8C;IAC9C,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB;qCACiC;IACjC,OAAO,EAAE,SAAS,EAAE,CAAC;IACrB;6EACyE;IACzE,YAAY,EAAE,MAAM,CAAC;IACrB;oBACgB;IAChB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED;;;;;;;GAOG;AACH,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;;;;;;OAOG;IACH,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,WAAW;IAC1B,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,wDAAwD;IACxD,QAAQ,EAAE,MAAM,CAAC;IACjB,wEAAwE;IACxE,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,8DAA8D;IAC9D,SAAS,EAAE,MAAM,CAAC;IAClB,iEAAiE;IACjE,OAAO,EAAE,MAAM,CAAC;IAChB,+CAA+C;IAC/C,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB;mDAC+C;IAC/C,iBAAiB,CAAC,EAAE,OAAO,CAAC;IAC5B;iCAC6B;IAC7B,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,6DAA6D;IAC7D,cAAc,CAAC,EAAE,OAAO,CAAC;CAC1B;AAED;;;;GAIG;AACH,MAAM,WAAW,eAAe;IAC9B;;8EAE0E;IAC1E,IAAI,EAAE,WAAW,GAAG,SAAS,GAAG,UAAU,GAAG,MAAM,CAAC;IACpD,gEAAgE;IAChE,KAAK,EAAE,KAAK,GAAG,KAAK,GAAG,KAAK,GAAG,aAAa,GAAG,SAAS,CAAC;IACzD,sDAAsD;IACtD,OAAO,EAAE,MAAM,CAAC;IAChB,0CAA0C;IAC1C,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,+CAA+C;IAC/C,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAoZD;;;;GAIG;AACH,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,kBAAkB;IACjC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED;;;;GAIG;AACH,MAAM,WAAW,UAAU;IACzB;;kEAE8D;IAC9D,SAAS,CAAC,KAAK,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC,iBAAiB,CAAC,CAAC;IAEjF;;;4BAGwB;IACxB,OAAO,CAAC,EAAE;QACR;;qEAE6D;QAC7D,2BAA2B,CAAC,EAAE,OAAO,CAAC;KACvC,CAAC;CACH;AAED;;yDAEyD;AACzD,MAAM,WAAW,SAAS;IACxB;;2BAEuB;IACvB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC1B;AAED,qBAAa,WAAW;IAEtB,OAAO,CAAC,KAAK,CAAoB;IACjC,OAAO,CAAC,IAAI,CAAsB;IAElC;;;;;OAKG;IACH;;;;;OAKG;IACH,IAAI,QAAQ,IAAI,CAAC,CAAC,KAAK,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,kBAAkB,KAAK,OAAO,CAAC,iBAAiB,CAAC,CAAC,GAAG,SAAS,CAEtG;IAED;;gEAE4D;IAC5D,OAAO,CAAC,WAAW,CAA2B;IAG9C,OAAO,CAAC,QAAQ,CAAgC;IAChD,OAAO,CAAC,OAAO,CAAmC;IAElD,OAAO,CAAC,KAAK,CAAyB;IACtC,OAAO,CAAC,WAAW,CAA4B;IAC/C;;;sEAGkE;IAClE,OAAO,CAAC,eAAe,CAAK;IAC5B;;;4DAGwD;IACxD,OAAO,CAAC,YAAY,CAAyB;IAC7C,oEAAoE;IACpE,OAAO,CAAC,SAAS,CAAyC;IAC1D,OAAO,CAAC,KAAK,CAAkB;IAC/B,OAAO,CAAC,QAAQ,CAA8B;IAC9C,OAAO,CAAC,UAAU,CAA8B;IAChD,OAAO,CAAC,IAAI,CAAyB;IACrC;;;;;mCAK+B;IAC/B,OAAO,CAAC,eAAe,CAAQ;IAC/B,OAAO,CAAC,qBAAqB,CAA6B;IAC1D,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAe;IAQrC,OAAO,CAAC,QAAQ,CAAuC;IACvD,OAAO,CAAC,KAAK,CAAS;gBAEV,IAAI,GAAE,YAAiB;IAInC,oEAAoE;IACpE,OAAO,CAAC,UAAU;IAWlB;6DACyD;IACzD,OAAO,CAAC,WAAW;IAWnB;4EACwE;IACxE,OAAO,CAAC,oBAAoB;IAU5B;;;;;;;;;OASG;IACG,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAkC3B;;;;;;;;;;;;;OAaG;YACW,eAAe;IAsC7B,sDAAsD;IACtD,IAAI,WAAW,IAAI,OAAO,CAAuB;IAEjD,2EAA2E;IAC3E,IAAI,UAAU,IAAI,OAAO,CAAmC;IAI5D;;;;;;;;OAQG;IACH,OAAO,CAAC,gBAAgB;IAUxB;;;;;;;;;;;;;;OAcG;YACW,aAAa;IA4C3B,OAAO,CAAC,GAAG;IAIX,OAAO,CAAC,SAAS;IAOjB,OAAO,CAAC,SAAS;IAMjB,OAAO,CAAC,QAAQ;IAKhB;;;qCAGiC;IACjC,OAAO,CAAC,aAAa;IAOrB,OAAO,CAAC,SAAS;IASjB;;;;;;;OAOG;IACH,OAAO,CAAC,YAAY;IAqBpB,OAAO,CAAC,aAAa;YAWP,cAAc;YA8Bd,UAAU;YAQV,UAAU;YAoBV,SAAS;IAkHvB;;;;;;;;;;;;;;;;;;;;;;OAsBG;YACW,gBAAgB;IAe9B;;;;;;;;;;;;;;;;;;;;OAoBG;YACW,sBAAsB;IAmEpC;;;;;;;;;;;;;OAaG;YACW,sBAAsB;YA6CtB,SAAS;YAWT,SAAS;YAkBT,QAAQ;YAgCR,UAAU;YA4BV,UAAU;YA4BV,SAAS;YAuDT,SAAS;IA8BvB;;;;;;;;OAQG;IACH,OAAO,CAAC,oBAAoB;YAqEd,SAAS;IAiGvB,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAc/B;IAIF;;;;;OAKG;IACG,SAAS,CAAC,IAAI,EAAE,IAAI,GAAG,OAAO,CAAC,eAAe,CAAC;YAIvC,eAAe;IAwF7B;;;;;;OAMG;IACH,cAAc,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO;IAKnC,OAAO,CAAC,oBAAoB;IAW5B;;;;OAIG;IACG,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,IAAI,GAAG,OAAO,CAAC,eAAe,CAAC;IAY5E;;;;;;OAMG;IACH,OAAO,IAAI,IAAI;IASf;;;;;;;;;;;OAWG;IACH,UAAU,CAAC,KAAK,EAAE,MAAM,GAAG,kBAAkB,EAAE;IAwC/C,yEAAyE;IACzE,OAAO,CAAC,UAAU;IASlB;;;;;;;;;;;;;OAaG;IACH,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,GAAE,aAAkB,GAAG,YAAY,EAAE;IAuB/D;;gCAE4B;IAC5B,OAAO,CAAC,aAAa;IAOrB;;;;;;;;;;;;;OAaG;IACI,iBAAiB,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,GAAE,aAAkB,GAAG,aAAa,CAAC,YAAY,CAAC;IAO9F;kFAC8E;YAChE,yBAAyB;IAoCvC;;;;;OAKG;IACI,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,GAAE,aAAkB,GAAG,aAAa,CAAC,YAAY,CAAC;IAKzF;;;;;;;;OAQG;YACW,kBAAkB;IA6FhC;;mEAE+D;IAC/D,OAAO,CAAC,WAAW;IASnB;;;;;;;;;;;mCAW+B;IAC/B,OAAO,CAAC,eAAe;IAwIvB;;;8CAG0C;IAC1C,OAAO,CAAC,SAAS;IAoBjB;;8EAE0E;IAC1E,OAAO,CAAC,UAAU;IAoBlB;qEACiE;IACjE,QAAQ,IAAI,WAAW;IAavB,0DAA0D;IAC1D,kBAAkB,IAAI,WAAW,GAAG,IAAI;IAIxC,6CAA6C;IAC7C,IAAI,SAAS,IAAI,SAAS,eAAe,EAAE,CAE1C;IAED,iCAAiC;IACjC,MAAM,KAAK,mBAAmB,IAAI,MAAM,EAAE,CAEzC;IAED,oCAAoC;IACpC,YAAY,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,IAAI;IAIzC,YAAY,CAAC,SAAS,EAAE,MAAM,GAAG,IAAI;IAIrC,aAAa,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI;IAIhC;;;;;;;;;OASG;IACH,WAAW,CAAC,IAAI,EAAE,KAAK,GAAG,IAAI,GAAG,IAAI;IAIrC,4DAA4D;IAC5D,KAAK,IAAI,IAAI;IAKb,OAAO,CAAC,WAAW;IAYnB;;;;;;;;;;;;;;;;;;OAkBG;IACH,eAAe,IAAI,eAAe,EAAE;IAMpC,oEAAoE;IACpE,OAAO,CAAC,KAAK;IAKb;;;;;;;;;;;;;;;;;;;;;;;;;;OA0BG;IACH,SAAS,CAAC,OAAO,EAAE,UAAU,GAAG,SAAS;IAsBzC;;;;;;OAMG;IACG,IAAI,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;YAIzB,UAAU;IAqBxB;;;;OAIG;IACG,IAAI,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;YAI5B,UAAU;IA2FxB;;;OAGG;IACG,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAQhD,8CAA8C;IACxC,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAIjD,+DAA+D;IACzD,aAAa,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;IAIxC;;;;;;;;;;OAUG;IACH,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,IAAI;CAczB"}
|