albex 0.3.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +466 -0
- package/README.md +32 -19
- package/dist/albex-worker.d.ts +65 -2
- package/dist/albex-worker.d.ts.map +1 -1
- package/dist/albex-worker.js +97 -20
- package/dist/albex-worker.js.map +1 -1
- package/dist/albex.d.ts +359 -55
- package/dist/albex.d.ts.map +1 -1
- package/dist/albex.js +766 -312
- package/dist/albex.js.map +1 -1
- package/dist/errors.d.ts +47 -2
- package/dist/errors.d.ts.map +1 -1
- package/dist/errors.js +41 -3
- package/dist/errors.js.map +1 -1
- package/dist/persistence.js +1 -1
- package/dist/pool/coordinator.d.ts +14 -6
- package/dist/pool/coordinator.d.ts.map +1 -1
- package/dist/pool/coordinator.js +65 -28
- package/dist/pool/coordinator.js.map +1 -1
- package/dist/profile.d.ts +11 -6
- package/dist/profile.d.ts.map +1 -1
- package/dist/profile.js +6 -13
- package/dist/profile.js.map +1 -1
- package/dist/resource-manager.js +1 -1
- package/dist/tiered-store.js +1 -1
- package/dist/wasm-bindings.d.ts +96 -6
- package/dist/wasm-bindings.d.ts.map +1 -1
- package/dist/wasm-bindings.js +110 -7
- package/dist/wasm-bindings.js.map +1 -1
- package/dist/worker-protocol.d.ts +23 -2
- package/dist/worker-protocol.d.ts.map +1 -1
- package/dist/worker-protocol.js +1 -1
- package/dist/worker-runtime.js +27 -3
- package/dist/worker-runtime.js.map +1 -1
- package/package.json +13 -9
- package/src/albex-worker.ts +103 -18
- package/src/albex.ts +2937 -2292
- package/src/errors.ts +63 -2
- package/src/pool/coordinator.ts +61 -34
- package/src/profile.ts +11 -10
- package/src/wasm-bindings.ts +225 -10
- package/src/worker-protocol.ts +12 -2
- package/src/worker-runtime.ts +28 -3
- package/wasm/pkg/albex_pdf.wasm +0 -0
- package/wasm/pkg/albex_wasm.wasm +0 -0
- package/wasm/pkg/albex_wasm_bg.wasm +0 -0
- package/wasm/pkg/albex_wasm_simd.wasm +0 -0
- package/wasm/pkg/albex_wasm_mini.wasm +0 -0
- package/wasm/pkg/albex_wasm_mini_simd.wasm +0 -0
- package/wasm/pkg/albex_wasm_pro.wasm +0 -0
- package/wasm/pkg/albex_wasm_pro_simd.wasm +0 -0
- package/wasm/pkg/albex_wasm_std.wasm +0 -0
- package/wasm/pkg/albex_wasm_std_simd.wasm +0 -0
package/dist/wasm-bindings.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*!
|
|
2
|
-
* albex v0.
|
|
2
|
+
* albex v0.6.1
|
|
3
3
|
* Zero-config local full-text search for documents — runs entirely in the browser, no server, no upload.
|
|
4
4
|
* (c) 2026 RafaCalRob
|
|
5
5
|
* @license MIT
|
|
@@ -17,17 +17,120 @@
|
|
|
17
17
|
* in `wasm/src/lib.rs` and `pdf-wasm/src/lib.rs`.
|
|
18
18
|
*/
|
|
19
19
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
20
|
-
//
|
|
20
|
+
// Runtime validators
|
|
21
21
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
22
|
+
//
|
|
23
|
+
// These replace the pre-0.5.0 `as unknown as` casts. They check three
|
|
24
|
+
// things at instantiation time:
|
|
25
|
+
// 1. memory is a WebAssembly.Memory instance.
|
|
26
|
+
// 2. abiVersion() returns a number inside the supported range.
|
|
27
|
+
// 3. every required export exists and is a function.
|
|
28
|
+
//
|
|
29
|
+
// If any of these fails, the loader throws a typed error before the
|
|
30
|
+
// engine returns from init(). This eliminates the audit 3.2 issue:
|
|
31
|
+
// previously a missing export only surfaced when its call site ran.
|
|
32
|
+
/** Range of ABI versions this host code understands for the main module.
|
|
33
|
+
* Update both ends together with the Rust `abiVersion()` constant when
|
|
34
|
+
* the export surface changes. */
|
|
35
|
+
// ABI 7 adds runtime capacity (initWithCapacity, decision A16) and removes
|
|
36
|
+
// the compile-time tier system (`getTier` is gone), on top of ABI 6's batch
|
|
37
|
+
// frontier reads, ABI 5's truncation signalling and ABI 4's authoritative
|
|
38
|
+
// chunk enumeration. The required-exports list below already makes any
|
|
39
|
+
// older binary fail the missing-exports check, so a tolerant lower bound
|
|
40
|
+
// was dead code — the range is pinned to the one ABI this host actually
|
|
41
|
+
// speaks (audit 0.6.0, finding #7). The .wasm ships inside this package
|
|
42
|
+
// (files: wasm/pkg/*.wasm), so host TS and binary are always
|
|
43
|
+
// version-matched.
|
|
44
|
+
const MAIN_ABI_MIN = 7;
|
|
45
|
+
const MAIN_ABI_MAX = 7;
|
|
46
|
+
/** Range of ABI versions for the PDF module. */
|
|
47
|
+
const PDF_ABI_MIN = 1;
|
|
48
|
+
const PDF_ABI_MAX = 3;
|
|
49
|
+
/** Required function names on the main WASM. Adding a new one here forces
|
|
50
|
+
* the validator to check it; removing one is a breaking ABI bump. */
|
|
51
|
+
const MAIN_REQUIRED = [
|
|
52
|
+
'abiVersion', 'getBuffer', 'init', 'initWithCapacity',
|
|
53
|
+
'setDocumentName', 'beginDocument', 'feedXmlBytes', 'endDocument',
|
|
54
|
+
'beginXlsx', 'feedXlsxBytes',
|
|
55
|
+
'feedText', 'flushParagraph',
|
|
56
|
+
'setMaxErrors', 'setThreshold', 'setMaxResults',
|
|
57
|
+
'prepareQuery', 'getQueryKind', 'getQueryBranchCount',
|
|
58
|
+
'getQueryBranchPattern', 'selectQueryBranch', 'getQueryTruncationFlags',
|
|
59
|
+
'setPattern', 'search',
|
|
60
|
+
'searchBegin', 'searchSlice', 'getSearchCursor', 'getSearchTotal',
|
|
61
|
+
'getResultCount', 'getResultsPtr', 'getResultStride',
|
|
62
|
+
'getResultDocId', 'getResultLocation', 'getResultScore',
|
|
63
|
+
'getResultStart', 'getResultEnd', 'getResultChunkIdx',
|
|
64
|
+
'getResultDocName', 'getResultMatchCount',
|
|
65
|
+
'getResultMatchStartAt', 'getResultMatchEndAt',
|
|
66
|
+
'getSnippet', 'getSnippetWindow', 'getSnippetWindowOffset',
|
|
67
|
+
'getStatBloomTested', 'getStatBloomPassed', 'getStatBitapMatched',
|
|
68
|
+
'getChunkCount', 'getDocCount', 'getTextUsed', 'getTextCapacity',
|
|
69
|
+
'getLastIndexOverflow',
|
|
70
|
+
'snapshotSize', 'snapshotChunk',
|
|
71
|
+
'restoreBegin', 'restoreFeed', 'restoreCommit',
|
|
72
|
+
'getDocId', 'getDocChunkCount', 'getDocName', 'isDocDeleted',
|
|
73
|
+
'removeDocument', 'compact',
|
|
74
|
+
'getDocChunkBase', 'getChunkLocationAt', 'getChunkByteLenAt', 'getChunkTextAt',
|
|
75
|
+
'listChunksBatch',
|
|
76
|
+
'setLanguage',
|
|
77
|
+
'getMaxChunks', 'getMaxDocs', 'getNameCapacity',
|
|
78
|
+
'getChunksPtr', 'getChunkStructSize',
|
|
79
|
+
'setCandidateMask', 'clearCandidateMask',
|
|
80
|
+
'getPatternBloomLo', 'getPatternBloomHi',
|
|
81
|
+
'getDocContentHashPtr', 'getDocContentHashLen', 'setDocumentContentHash',
|
|
82
|
+
'hashBegin', 'hashFeed', 'hashFinish',
|
|
83
|
+
];
|
|
84
|
+
const PDF_REQUIRED = [
|
|
85
|
+
'abiVersion', 'allocInput', 'extractPdf',
|
|
86
|
+
'getPageLen', 'getPagePtr', 'getErrorLen', 'getErrorPtr',
|
|
87
|
+
'getPageCount', 'extractPageImages',
|
|
88
|
+
'getPageImageLen', 'getPageImagePtr', 'getPageImageKind',
|
|
89
|
+
];
|
|
90
|
+
/** Thrown when an instantiated WASM module fails the ABI contract. */
|
|
91
|
+
export class AlbexAbiMismatchError extends Error {
|
|
92
|
+
module;
|
|
93
|
+
missing;
|
|
94
|
+
version;
|
|
95
|
+
constructor(module, message, opts) {
|
|
96
|
+
super(message);
|
|
97
|
+
this.name = 'AlbexAbiMismatchError';
|
|
98
|
+
this.module = module;
|
|
99
|
+
if (opts?.missing)
|
|
100
|
+
this.missing = opts.missing;
|
|
101
|
+
if (opts?.version !== undefined)
|
|
102
|
+
this.version = opts.version;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
function validateExports(exports, required, module, abiMin, abiMax) {
|
|
106
|
+
const mem = exports['memory'];
|
|
107
|
+
if (!(mem instanceof WebAssembly.Memory)) {
|
|
108
|
+
throw new AlbexAbiMismatchError(module, `${module}: \`memory\` is missing or not a WebAssembly.Memory instance.`);
|
|
109
|
+
}
|
|
110
|
+
const missing = [];
|
|
111
|
+
for (const name of required) {
|
|
112
|
+
if (typeof exports[name] !== 'function')
|
|
113
|
+
missing.push(name);
|
|
114
|
+
}
|
|
115
|
+
if (missing.length) {
|
|
116
|
+
throw new AlbexAbiMismatchError(module, `${module}: WASM binary missing required exports: ${missing.join(', ')}. ` +
|
|
117
|
+
`The .wasm was built with an incompatible source — rebuild with the current toolchain.`, { missing });
|
|
118
|
+
}
|
|
119
|
+
const version = exports['abiVersion']();
|
|
120
|
+
if (version < abiMin || version > abiMax) {
|
|
121
|
+
throw new AlbexAbiMismatchError(module, `${module}: abiVersion ${version} outside supported range [${abiMin}..${abiMax}]. ` +
|
|
122
|
+
`The host TypeScript expects a different binary — upgrade albex or rebuild the WASM.`, { version });
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
/** Validate and narrow `WebAssembly.Exports` to the typed Albex main
|
|
126
|
+
* interface. Throws `AlbexAbiMismatchError` if the contract is broken. */
|
|
27
127
|
export function asAlbexExports(exports) {
|
|
128
|
+
validateExports(exports, MAIN_REQUIRED, 'main', MAIN_ABI_MIN, MAIN_ABI_MAX);
|
|
28
129
|
return exports;
|
|
29
130
|
}
|
|
131
|
+
/** Validate and narrow `WebAssembly.Exports` to the typed PDF interface. */
|
|
30
132
|
export function asAlbexPdfExports(exports) {
|
|
133
|
+
validateExports(exports, PDF_REQUIRED, 'pdf', PDF_ABI_MIN, PDF_ABI_MAX);
|
|
31
134
|
return exports;
|
|
32
135
|
}
|
|
33
136
|
//# sourceMappingURL=wasm-bindings.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"wasm-bindings.js","sourceRoot":"","sources":["../src/wasm-bindings.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;
|
|
1
|
+
{"version":3,"file":"wasm-bindings.js","sourceRoot":"","sources":["../src/wasm-bindings.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AA6QH,gFAAgF;AAChF,qBAAqB;AACrB,gFAAgF;AAChF,EAAE;AACF,sEAAsE;AACtE,gCAAgC;AAChC,gDAAgD;AAChD,iEAAiE;AACjE,uDAAuD;AACvD,EAAE;AACF,oEAAoE;AACpE,mEAAmE;AACnE,oEAAoE;AAEpE;;iCAEiC;AACjC,2EAA2E;AAC3E,4EAA4E;AAC5E,0EAA0E;AAC1E,uEAAuE;AACvE,yEAAyE;AACzE,wEAAwE;AACxE,wEAAwE;AACxE,6DAA6D;AAC7D,mBAAmB;AACnB,MAAM,YAAY,GAAG,CAAC,CAAC;AACvB,MAAM,YAAY,GAAG,CAAC,CAAC;AAEvB,gDAAgD;AAChD,MAAM,WAAW,GAAG,CAAC,CAAC;AACtB,MAAM,WAAW,GAAG,CAAC,CAAC;AAEtB;qEACqE;AACrE,MAAM,aAAa,GAAG;IACpB,YAAY,EAAE,WAAW,EAAE,MAAM,EAAE,kBAAkB;IACrD,iBAAiB,EAAE,eAAe,EAAE,cAAc,EAAE,aAAa;IACjE,WAAW,EAAE,eAAe;IAC5B,UAAU,EAAE,gBAAgB;IAC5B,cAAc,EAAE,cAAc,EAAE,eAAe;IAC/C,cAAc,EAAE,cAAc,EAAE,qBAAqB;IACrD,uBAAuB,EAAE,mBAAmB,EAAE,yBAAyB;IACvE,YAAY,EAAE,QAAQ;IACtB,aAAa,EAAE,aAAa,EAAE,iBAAiB,EAAE,gBAAgB;IACjE,gBAAgB,EAAE,eAAe,EAAE,iBAAiB;IACpD,gBAAgB,EAAE,mBAAmB,EAAE,gBAAgB;IACvD,gBAAgB,EAAE,cAAc,EAAE,mBAAmB;IACrD,kBAAkB,EAAE,qBAAqB;IACzC,uBAAuB,EAAE,qBAAqB;IAC9C,YAAY,EAAE,kBAAkB,EAAE,wBAAwB;IAC1D,oBAAoB,EAAE,oBAAoB,EAAE,qBAAqB;IACjE,eAAe,EAAE,aAAa,EAAE,aAAa,EAAE,iBAAiB;IAChE,sBAAsB;IACtB,cAAc,EAAE,eAAe;IAC/B,cAAc,EAAE,aAAa,EAAE,eAAe;IAC9C,UAAU,EAAE,kBAAkB,EAAE,YAAY,EAAE,cAAc;IAC5D,gBAAgB,EAAE,SAAS;IAC3B,iBAAiB,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,gBAAgB;IAC9E,iBAAiB;IACjB,aAAa;IACb,cAAc,EAAE,YAAY,EAAE,iBAAiB;IAC/C,cAAc,EAAE,oBAAoB;IACpC,kBAAkB,EAAE,oBAAoB;IACxC,mBAAmB,EAAE,mBAAmB;IACxC,sBAAsB,EAAE,sBAAsB,EAAE,wBAAwB;IACxE,WAAW,EAAE,UAAU,EAAE,YAAY;CAC7B,CAAC;AAEX,MAAM,YAAY,GAAG;IACnB,YAAY,EAAE,YAAY,EAAE,YAAY;IACxC,YAAY,EAAE,YAAY,EAAE,aAAa,EAAE,aAAa;IACxD,cAAc,EAAE,mBAAmB;IACnC,iBAAiB,EAAE,iBAAiB,EAAE,kBAAkB;CAChD,CAAC;AAEX,sEAAsE;AACtE,MAAM,OAAO,qBAAsB,SAAQ,KAAK;IACrC,MAAM,CAAiB;IACvB,OAAO,CAAqB;IAC5B,OAAO,CAAU;IAC1B,YAAY,MAAsB,EAAE,OAAe,EAAE,IAAwD;QAC3G,KAAK,CAAC,OAAO,CAAC,CAAC;QACf,IAAI,CAAC,IAAI,GAAG,uBAAuB,CAAC;QACpC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,IAAI,EAAE,OAAO;YAAE,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC;QAC/C,IAAI,IAAI,EAAE,OAAO,KAAK,SAAS;YAAE,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC;IAC/D,CAAC;CACF;AAED,SAAS,eAAe,CACtB,OAA4B,EAC5B,QAA2B,EAC3B,MAAsB,EACtB,MAAc,EACd,MAAc;IAEd,MAAM,GAAG,GAAI,OAAmC,CAAC,QAAQ,CAAC,CAAC;IAC3D,IAAI,CAAC,CAAC,GAAG,YAAY,WAAW,CAAC,MAAM,CAAC,EAAE,CAAC;QACzC,MAAM,IAAI,qBAAqB,CAAC,MAAM,EAAE,GAAG,MAAM,+DAA+D,CAAC,CAAC;IACpH,CAAC;IACD,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;QAC5B,IAAI,OAAQ,OAAmC,CAAC,IAAI,CAAC,KAAK,UAAU;YAAE,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC3F,CAAC;IACD,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;QACnB,MAAM,IAAI,qBAAqB,CAC7B,MAAM,EACN,GAAG,MAAM,2CAA2C,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI;YAC1E,uFAAuF,EACvF,EAAE,OAAO,EAAE,CACZ,CAAC;IACJ,CAAC;IACD,MAAM,OAAO,GAAK,OAAmC,CAAC,YAAY,CAAkB,EAAE,CAAC;IACvF,IAAI,OAAO,GAAG,MAAM,IAAI,OAAO,GAAG,MAAM,EAAE,CAAC;QACzC,MAAM,IAAI,qBAAqB,CAC7B,MAAM,EACN,GAAG,MAAM,gBAAgB,OAAO,6BAA6B,MAAM,KAAK,MAAM,KAAK;YACnF,qFAAqF,EACrF,EAAE,OAAO,EAAE,CACZ,CAAC;IACJ,CAAC;AACH,CAAC;AAED;0EAC0E;AAC1E,MAAM,UAAU,cAAc,CAAC,OAA4B;IACzD,eAAe,CAAC,OAAO,EAAE,aAAa,EAAE,MAAM,EAAE,YAAY,EAAE,YAAY,CAAC,CAAC;IAC5E,OAAO,OAAsC,CAAC;AAChD,CAAC;AAED,4EAA4E;AAC5E,MAAM,UAAU,iBAAiB,CAAC,OAA4B;IAC5D,eAAe,CAAC,OAAO,EAAE,YAAY,EAAE,KAAK,EAAE,WAAW,EAAE,WAAW,CAAC,CAAC;IACxE,OAAO,OAAqC,CAAC;AAC/C,CAAC"}
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
* `Transferable` is opt-in per op; we use it for `indexFile` to avoid
|
|
10
10
|
* copying the file bytes into the worker.
|
|
11
11
|
*/
|
|
12
|
-
import type { AlbexOptions, IndexedDocument, SearchOptions, SearchResult, EngineStats, SearchStats } from './albex.js';
|
|
12
|
+
import type { AlbexDiagnostic, AlbexOptions, AuthoritativeChunk, IndexedDocument, SearchOptions, SearchResult, EngineStats, SearchStats } from './albex.js';
|
|
13
13
|
export type WorkerOp = {
|
|
14
14
|
kind: 'init';
|
|
15
15
|
opts: AlbexOptions;
|
|
@@ -21,9 +21,23 @@ export type WorkerOp = {
|
|
|
21
21
|
kind: 'search';
|
|
22
22
|
query: string;
|
|
23
23
|
options: SearchOptions;
|
|
24
|
+
} | {
|
|
25
|
+
kind: 'listChunks';
|
|
26
|
+
docId: number;
|
|
24
27
|
} | {
|
|
25
28
|
kind: 'removeDocument';
|
|
26
29
|
id: string;
|
|
30
|
+
}
|
|
31
|
+
/** Replace doc `name` with new content. `fileName` is the replacement
|
|
32
|
+
* file's own name (may differ from `name`); the bytes travel as a
|
|
33
|
+
* transferred ArrayBuffer like `indexFile`. */
|
|
34
|
+
| {
|
|
35
|
+
kind: 'replaceDocument';
|
|
36
|
+
name: string;
|
|
37
|
+
fileName: string;
|
|
38
|
+
buffer: ArrayBuffer;
|
|
39
|
+
} | {
|
|
40
|
+
kind: 'takeDiagnostics';
|
|
27
41
|
} | {
|
|
28
42
|
kind: 'compact';
|
|
29
43
|
} | {
|
|
@@ -69,18 +83,25 @@ export type WorkerResponse = {
|
|
|
69
83
|
id: number;
|
|
70
84
|
ok: true;
|
|
71
85
|
result: unknown;
|
|
72
|
-
}
|
|
86
|
+
}
|
|
87
|
+
/** `limit`/`max` are populated for capacity errors so the rehydrated
|
|
88
|
+
* AlbexCapacityError keeps reporting the runtime limit that overflowed. */
|
|
89
|
+
| {
|
|
73
90
|
id: number;
|
|
74
91
|
ok: false;
|
|
75
92
|
error: {
|
|
76
93
|
name: string;
|
|
77
94
|
kind?: string;
|
|
78
95
|
message: string;
|
|
96
|
+
limit?: string;
|
|
97
|
+
max?: number;
|
|
79
98
|
};
|
|
80
99
|
};
|
|
81
100
|
export type IndexFileResult = IndexedDocument;
|
|
82
101
|
export type SearchResultArr = SearchResult[];
|
|
102
|
+
export type ChunksResult = AuthoritativeChunk[];
|
|
83
103
|
export type StatsResult = EngineStats;
|
|
84
104
|
export type SearchStatsRes = SearchStats | null;
|
|
85
105
|
export type DocsResult = readonly IndexedDocument[];
|
|
106
|
+
export type DiagnosticsRes = AlbexDiagnostic[];
|
|
86
107
|
//# sourceMappingURL=worker-protocol.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"worker-protocol.d.ts","sourceRoot":"","sources":["../src/worker-protocol.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,eAAe,EAAE,aAAa,EAAE,YAAY,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"worker-protocol.d.ts","sourceRoot":"","sources":["../src/worker-protocol.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,KAAK,EAAE,eAAe,EAAE,YAAY,EAAE,kBAAkB,EAAE,eAAe,EAAE,aAAa,EAAE,YAAY,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AAE5J,MAAM,MAAM,QAAQ,GAChB;IAAE,IAAI,EAAE,MAAM,CAAC;IAAY,IAAI,EAAE,YAAY,CAAA;CAAE,GAC/C;IAAE,IAAI,EAAE,WAAW,CAAC;IAAQ,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,WAAW,CAAA;CAAE,GAC/D;IAAE,IAAI,EAAE,QAAQ,CAAC;IAAW,KAAK,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,aAAa,CAAA;CAAE,GACnE;IAAE,IAAI,EAAE,YAAY,CAAC;IAAO,KAAK,EAAE,MAAM,CAAA;CAAE,GAC3C;IAAE,IAAI,EAAE,gBAAgB,CAAC;IAAG,EAAE,EAAE,MAAM,CAAA;CAAE;AAC1C;;+CAE+C;GAC7C;IAAE,IAAI,EAAE,iBAAiB,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,WAAW,CAAA;CAAE,GACjF;IAAE,IAAI,EAAE,iBAAiB,CAAA;CAAE,GAC3B;IAAE,IAAI,EAAE,SAAS,CAAA;CAAE,GACnB;IAAE,IAAI,EAAE,OAAO,CAAA;CAAE,GACjB;IAAE,IAAI,EAAE,UAAU,CAAA;CAAE,GACpB;IAAE,IAAI,EAAE,oBAAoB,CAAA;CAAE,GAC9B;IAAE,IAAI,EAAE,cAAc,CAAA;CAAE,GACxB;IAAE,IAAI,EAAE,cAAc,CAAC;IAAK,CAAC,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;CAAE,GAC9C;IAAE,IAAI,EAAE,cAAc,CAAC;IAAK,CAAC,EAAE,MAAM,CAAA;CAAE,GACvC;IAAE,IAAI,EAAE,eAAe,CAAC;IAAI,CAAC,EAAE,MAAM,CAAA;CAAE,GACvC;IAAE,IAAI,EAAE,aAAa,CAAC;IAAM,IAAI,EAAE,KAAK,GAAG,IAAI,CAAA;CAAE,GAChD;IAAE,IAAI,EAAE,MAAM,CAAC;IAAa,IAAI,EAAE,MAAM,CAAA;CAAE,GAC1C;IAAE,IAAI,EAAE,MAAM,CAAC;IAAa,IAAI,EAAE,MAAM,CAAA;CAAE,GAC1C;IAAE,IAAI,EAAE,YAAY,CAAC;IAAO,IAAI,EAAE,MAAM,CAAA;CAAE,GAC1C;IAAE,IAAI,EAAE,gBAAgB,CAAC;IAAG,IAAI,EAAE,MAAM,CAAA;CAAE,GAC1C;IAAE,IAAI,EAAE,eAAe,CAAA;CAAE,CAAC;AAE9B,MAAM,WAAW,aAAa;IAC5B,EAAE,EAAE,MAAM,CAAC;IACX,EAAE,EAAE,QAAQ,CAAC;CACd;AAED,MAAM,MAAM,cAAc,GACtB;IAAE,EAAE,EAAE,MAAM,CAAC;IAAC,EAAE,EAAE,IAAI,CAAC;IAAE,MAAM,EAAE,OAAO,CAAA;CAAE;AAC5C;2EAC2E;GACzE;IAAE,EAAE,EAAE,MAAM,CAAC;IAAC,EAAE,EAAE,KAAK,CAAC;IAAC,KAAK,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,CAAC,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,KAAK,CAAC,EAAE,MAAM,CAAC;QAAC,GAAG,CAAC,EAAE,MAAM,CAAA;KAAE,CAAA;CAAE,CAAC;AAErH,MAAM,MAAM,eAAe,GAAG,eAAe,CAAC;AAC9C,MAAM,MAAM,eAAe,GAAG,YAAY,EAAE,CAAC;AAC7C,MAAM,MAAM,YAAY,GAAM,kBAAkB,EAAE,CAAC;AACnD,MAAM,MAAM,WAAW,GAAO,WAAW,CAAC;AAC1C,MAAM,MAAM,cAAc,GAAI,WAAW,GAAG,IAAI,CAAC;AACjD,MAAM,MAAM,UAAU,GAAQ,SAAS,eAAe,EAAE,CAAC;AACzD,MAAM,MAAM,cAAc,GAAI,eAAe,EAAE,CAAC"}
|
package/dist/worker-protocol.js
CHANGED
package/dist/worker-runtime.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*!
|
|
2
|
-
* albex v0.
|
|
2
|
+
* albex v0.6.1
|
|
3
3
|
* Zero-config local full-text search for documents — runs entirely in the browser, no server, no upload.
|
|
4
4
|
* (c) 2026 RafaCalRob
|
|
5
5
|
* @license MIT
|
|
@@ -39,8 +39,18 @@ async function dispatch(op) {
|
|
|
39
39
|
}
|
|
40
40
|
case 'search':
|
|
41
41
|
return ensureEngine().search(op.query, op.options);
|
|
42
|
+
case 'listChunks':
|
|
43
|
+
return ensureEngine().listChunks(op.docId);
|
|
42
44
|
case 'removeDocument':
|
|
43
45
|
return ensureEngine().removeDocument(op.id);
|
|
46
|
+
case 'replaceDocument': {
|
|
47
|
+
// Same File-like wrapping as indexFile; the engine's replaceDocument
|
|
48
|
+
// handles remove + re-index + auto-compact under its own lock.
|
|
49
|
+
const file = new File([op.buffer], op.fileName);
|
|
50
|
+
return ensureEngine().replaceDocument(op.name, file);
|
|
51
|
+
}
|
|
52
|
+
case 'takeDiagnostics':
|
|
53
|
+
return ensureEngine().takeDiagnostics();
|
|
44
54
|
case 'compact':
|
|
45
55
|
ensureEngine().compact();
|
|
46
56
|
return undefined;
|
|
@@ -77,8 +87,8 @@ async function dispatch(op) {
|
|
|
77
87
|
return ensureEngine().listSnapshots();
|
|
78
88
|
}
|
|
79
89
|
}
|
|
80
|
-
|
|
81
|
-
const { id, op } =
|
|
90
|
+
async function handle(req) {
|
|
91
|
+
const { id, op } = req;
|
|
82
92
|
try {
|
|
83
93
|
const result = await dispatch(op);
|
|
84
94
|
const res = { id, ok: true, result };
|
|
@@ -92,9 +102,23 @@ self.onmessage = async (ev) => {
|
|
|
92
102
|
name: e.name ?? 'Error',
|
|
93
103
|
kind: err instanceof AlbexError ? err.kind : undefined,
|
|
94
104
|
message: e.message ?? String(err),
|
|
105
|
+
// Capacity metadata (which pool + its runtime limit) — plain data,
|
|
106
|
+
// survives structuredClone, lets the main side rehydrate a full
|
|
107
|
+
// AlbexCapacityError.
|
|
108
|
+
limit: typeof e.limit === 'string' ? e.limit : undefined,
|
|
109
|
+
max: typeof e.max === 'number' ? e.max : undefined,
|
|
95
110
|
},
|
|
96
111
|
};
|
|
97
112
|
self.postMessage(res);
|
|
98
113
|
}
|
|
114
|
+
}
|
|
115
|
+
// Process messages strictly in arrival order. The engine guards its own
|
|
116
|
+
// state, but a sync `search` arriving mid-`indexFile` await would otherwise
|
|
117
|
+
// be rejected as "busy"; queueing keeps the worker's externally-observable
|
|
118
|
+
// behaviour serial and matches the main-thread engine's serialization.
|
|
119
|
+
let _queue = Promise.resolve();
|
|
120
|
+
self.onmessage = (ev) => {
|
|
121
|
+
const req = ev.data;
|
|
122
|
+
_queue = _queue.then(() => handle(req));
|
|
99
123
|
};
|
|
100
124
|
//# sourceMappingURL=worker-runtime.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"worker-runtime.js","sourceRoot":"","sources":["../src/worker-runtime.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAGrD,IAAI,MAAM,GAAuB,IAAI,CAAC;AAEtC,SAAS,YAAY;IACnB,IAAI,CAAC,MAAM;QAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;IACtE,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,KAAK,UAAU,QAAQ,CAAC,EAAY;IAClC,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAC;QAChB,KAAK,MAAM,CAAC,CAAC,CAAC;YACZ,MAAM,GAAG,IAAI,WAAW,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC;YAClC,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;YACpB,OAAO,SAAS,CAAC;QACnB,CAAC;QACD,KAAK,WAAW,CAAC,CAAC,CAAC;YACjB,gEAAgE;YAChE,2BAA2B;YAC3B,MAAM,IAAI,GAAG,IAAI,IAAI,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC;YAC5C,OAAO,YAAY,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QACxC,CAAC;QACD,KAAK,QAAQ;YACX,OAAO,YAAY,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,OAAO,CAAC,CAAC;QACrD,KAAK,gBAAgB;YACnB,OAAO,YAAY,EAAE,CAAC,cAAc,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC;QAC9C,KAAK,SAAS;YACZ,YAAY,EAAE,CAAC,OAAO,EAAE,CAAC;YACzB,OAAO,SAAS,CAAC;QACnB,KAAK,OAAO;YACV,YAAY,EAAE,CAAC,KAAK,EAAE,CAAC;YACvB,OAAO,SAAS,CAAC;QACnB,KAAK,UAAU;YACb,OAAO,YAAY,EAAE,CAAC,QAAQ,EAAE,CAAC;QACnC,KAAK,oBAAoB;YACvB,OAAO,YAAY,EAAE,CAAC,kBAAkB,EAAE,CAAC;QAC7C,KAAK,cAAc;YACjB,OAAO,YAAY,EAAE,CAAC,SAAS,CAAC,KAAK,EAAE,CAAC;QAC1C,KAAK,cAAc;YACjB,YAAY,EAAE,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;YAClC,OAAO,SAAS,CAAC;QACnB,KAAK,cAAc;YACjB,YAAY,EAAE,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;YAClC,OAAO,SAAS,CAAC;QACnB,KAAK,eAAe;YAClB,YAAY,EAAE,CAAC,aAAa,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;YACnC,OAAO,SAAS,CAAC;QACnB,KAAK,aAAa;YAChB,YAAY,EAAE,CAAC,WAAW,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC;YACpC,OAAO,SAAS,CAAC;QACnB,KAAK,MAAM;YACT,OAAO,YAAY,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC;QACtC,KAAK,MAAM;YACT,OAAO,YAAY,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC;QACtC,KAAK,YAAY;YACf,OAAO,YAAY,EAAE,CAAC,UAAU,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC;QAC5C,KAAK,gBAAgB;YACnB,OAAO,YAAY,EAAE,CAAC,cAAc,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC;QAChD,KAAK,eAAe;YAClB,OAAO,YAAY,EAAE,CAAC,aAAa,EAAE,CAAC;IAC1C,CAAC;AACH,CAAC;AAED,
|
|
1
|
+
{"version":3,"file":"worker-runtime.js","sourceRoot":"","sources":["../src/worker-runtime.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAGrD,IAAI,MAAM,GAAuB,IAAI,CAAC;AAEtC,SAAS,YAAY;IACnB,IAAI,CAAC,MAAM;QAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;IACtE,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,KAAK,UAAU,QAAQ,CAAC,EAAY;IAClC,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAC;QAChB,KAAK,MAAM,CAAC,CAAC,CAAC;YACZ,MAAM,GAAG,IAAI,WAAW,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC;YAClC,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;YACpB,OAAO,SAAS,CAAC;QACnB,CAAC;QACD,KAAK,WAAW,CAAC,CAAC,CAAC;YACjB,gEAAgE;YAChE,2BAA2B;YAC3B,MAAM,IAAI,GAAG,IAAI,IAAI,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC;YAC5C,OAAO,YAAY,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QACxC,CAAC;QACD,KAAK,QAAQ;YACX,OAAO,YAAY,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,OAAO,CAAC,CAAC;QACrD,KAAK,YAAY;YACf,OAAO,YAAY,EAAE,CAAC,UAAU,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC;QAC7C,KAAK,gBAAgB;YACnB,OAAO,YAAY,EAAE,CAAC,cAAc,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC;QAC9C,KAAK,iBAAiB,CAAC,CAAC,CAAC;YACvB,qEAAqE;YACrE,+DAA+D;YAC/D,MAAM,IAAI,GAAG,IAAI,IAAI,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC;YAChD,OAAO,YAAY,EAAE,CAAC,eAAe,CAAC,EAAE,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;QACvD,CAAC;QACD,KAAK,iBAAiB;YACpB,OAAO,YAAY,EAAE,CAAC,eAAe,EAAE,CAAC;QAC1C,KAAK,SAAS;YACZ,YAAY,EAAE,CAAC,OAAO,EAAE,CAAC;YACzB,OAAO,SAAS,CAAC;QACnB,KAAK,OAAO;YACV,YAAY,EAAE,CAAC,KAAK,EAAE,CAAC;YACvB,OAAO,SAAS,CAAC;QACnB,KAAK,UAAU;YACb,OAAO,YAAY,EAAE,CAAC,QAAQ,EAAE,CAAC;QACnC,KAAK,oBAAoB;YACvB,OAAO,YAAY,EAAE,CAAC,kBAAkB,EAAE,CAAC;QAC7C,KAAK,cAAc;YACjB,OAAO,YAAY,EAAE,CAAC,SAAS,CAAC,KAAK,EAAE,CAAC;QAC1C,KAAK,cAAc;YACjB,YAAY,EAAE,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;YAClC,OAAO,SAAS,CAAC;QACnB,KAAK,cAAc;YACjB,YAAY,EAAE,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;YAClC,OAAO,SAAS,CAAC;QACnB,KAAK,eAAe;YAClB,YAAY,EAAE,CAAC,aAAa,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;YACnC,OAAO,SAAS,CAAC;QACnB,KAAK,aAAa;YAChB,YAAY,EAAE,CAAC,WAAW,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC;YACpC,OAAO,SAAS,CAAC;QACnB,KAAK,MAAM;YACT,OAAO,YAAY,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC;QACtC,KAAK,MAAM;YACT,OAAO,YAAY,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC;QACtC,KAAK,YAAY;YACf,OAAO,YAAY,EAAE,CAAC,UAAU,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC;QAC5C,KAAK,gBAAgB;YACnB,OAAO,YAAY,EAAE,CAAC,cAAc,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC;QAChD,KAAK,eAAe;YAClB,OAAO,YAAY,EAAE,CAAC,aAAa,EAAE,CAAC;IAC1C,CAAC;AACH,CAAC;AAED,KAAK,UAAU,MAAM,CAAC,GAAkB;IACtC,MAAM,EAAE,EAAE,EAAE,EAAE,EAAE,GAAG,GAAG,CAAC;IACvB,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,EAAE,CAAC,CAAC;QAClC,MAAM,GAAG,GAAmB,EAAE,EAAE,EAAE,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC;QACpD,IAA0B,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IAC/C,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,CAAC,GAAG,GAA8D,CAAC;QACzE,MAAM,GAAG,GAAmB;YAC1B,EAAE,EAAE,EAAE,EAAE,KAAK;YACb,KAAK,EAAE;gBACL,IAAI,EAAK,CAAC,CAAC,IAAI,IAAI,OAAO;gBAC1B,IAAI,EAAK,GAAG,YAAY,UAAU,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS;gBACzD,OAAO,EAAE,CAAC,CAAC,OAAO,IAAI,MAAM,CAAC,GAAG,CAAC;gBACjC,mEAAmE;gBACnE,gEAAgE;gBAChE,sBAAsB;gBACtB,KAAK,EAAI,OAAO,CAAC,CAAC,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;gBAC1D,GAAG,EAAM,OAAO,CAAC,CAAC,GAAG,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,SAAS;aACvD;SACF,CAAC;QACD,IAA0B,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IAC/C,CAAC;AACH,CAAC;AAED,wEAAwE;AACxE,4EAA4E;AAC5E,2EAA2E;AAC3E,uEAAuE;AACvE,IAAI,MAAM,GAAkB,OAAO,CAAC,OAAO,EAAE,CAAC;AAC9C,IAAI,CAAC,SAAS,GAAG,CAAC,EAA+B,EAAE,EAAE;IACnD,MAAM,GAAG,GAAG,EAAE,CAAC,IAAI,CAAC;IACpB,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;AAC1C,CAAC,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "albex",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.1",
|
|
4
4
|
"description": "Zero-config local full-text search for documents — runs entirely in the browser, no server, no upload.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/albex.js",
|
|
@@ -42,18 +42,21 @@
|
|
|
42
42
|
],
|
|
43
43
|
"scripts": {
|
|
44
44
|
"build": "tsc && node scripts/banner.mjs",
|
|
45
|
-
"prepublishOnly": "npm run build",
|
|
46
|
-
"build:wasm": "node scripts/build-wasm.mjs
|
|
47
|
-
"build:wasm:
|
|
48
|
-
"build:wasm
|
|
49
|
-
"build:
|
|
50
|
-
"build:pdf-wasm": "cd pdf-wasm && cargo build --target wasm32-unknown-unknown --release && cp ../target/wasm32-unknown-unknown/release/albex_pdf.wasm ../wasm/pkg/albex_pdf.wasm && node scripts/wasm-banner.mjs",
|
|
51
|
-
"build:all": "npm run build:wasm:tiers && npm run build:pdf-wasm && npm run build",
|
|
45
|
+
"prepublishOnly": "npm run build:all && npm test",
|
|
46
|
+
"build:wasm": "node scripts/build-wasm.mjs && node scripts/wasm-banner.mjs",
|
|
47
|
+
"build:wasm:baseline": "node scripts/build-wasm.mjs --no-simd && node scripts/wasm-banner.mjs",
|
|
48
|
+
"build:pdf-wasm": "node scripts/build-pdf-wasm.mjs && node scripts/wasm-banner.mjs",
|
|
49
|
+
"build:all": "npm run build:wasm && npm run build:pdf-wasm && npm run build",
|
|
52
50
|
"typecheck": "tsc --noEmit",
|
|
53
51
|
"test": "vitest run",
|
|
54
52
|
"test:watch": "vitest",
|
|
55
53
|
"bench": "vitest bench --run",
|
|
56
|
-
"
|
|
54
|
+
"test:e2e": "playwright test",
|
|
55
|
+
"build:ocr": "npm --prefix packages/ocr run build",
|
|
56
|
+
"clean": "node scripts/clean.mjs",
|
|
57
|
+
"clean:all": "node scripts/clean.mjs --all",
|
|
58
|
+
"serve": "npx --yes serve@14 -p 5173 -L .",
|
|
59
|
+
"relaunch": "npm run clean && npm run build:all && npm run build:ocr && npm test && npm pack && node scripts/relaunch-banner.mjs && npm run serve"
|
|
57
60
|
},
|
|
58
61
|
"repository": {
|
|
59
62
|
"type": "git",
|
|
@@ -94,6 +97,7 @@
|
|
|
94
97
|
"node": ">=18"
|
|
95
98
|
},
|
|
96
99
|
"devDependencies": {
|
|
100
|
+
"@playwright/test": "^1.60.0",
|
|
97
101
|
"typescript": "^5.4.0",
|
|
98
102
|
"vitest": "^2.0.0"
|
|
99
103
|
}
|
package/src/albex-worker.ts
CHANGED
|
@@ -20,10 +20,28 @@
|
|
|
20
20
|
* call is in flight at a time. This matches the actual `static mut` model
|
|
21
21
|
* inside the .wasm and is fine for an interactive search UI (each keystroke
|
|
22
22
|
* replaces the previous query).
|
|
23
|
+
*
|
|
24
|
+
* ## OCR is NOT available in the worker
|
|
25
|
+
*
|
|
26
|
+
* `AlbexEngineWorker` has no `attachOcr`: an OCR adapter is an object with
|
|
27
|
+
* functions, and functions cannot cross the `postMessage` boundary (the
|
|
28
|
+
* structured-clone algorithm rejects them). Consequences:
|
|
29
|
+
*
|
|
30
|
+
* - **Scanned (image-only) PDFs index with 0 chunks, silently.** The
|
|
31
|
+
* engine records a diagnostic explaining why — read it with
|
|
32
|
+
* {@link takeDiagnostics} after `indexFile`.
|
|
33
|
+
* - If your corpus contains scanned PDFs and you need their text, index
|
|
34
|
+
* them with the synchronous main-thread `AlbexEngine` plus the OCR
|
|
35
|
+
* adapter (`engine.attachOcr(...)` / `@albex/ocr`'s `enableOcr`), then
|
|
36
|
+
* `save()` the snapshot and `load()` it from the worker engine.
|
|
37
|
+
* - A future protocol iteration could proxy OCR over a dedicated
|
|
38
|
+
* `MessagePort`; until then the main-thread engine is the OCR path.
|
|
23
39
|
*/
|
|
24
40
|
|
|
25
41
|
import type {
|
|
42
|
+
AlbexDiagnostic,
|
|
26
43
|
AlbexOptions,
|
|
44
|
+
AuthoritativeChunk,
|
|
27
45
|
IndexedDocument,
|
|
28
46
|
SearchOptions,
|
|
29
47
|
SearchResult,
|
|
@@ -41,6 +59,7 @@ import {
|
|
|
41
59
|
AlbexUnsupportedFormatError,
|
|
42
60
|
AlbexParseError,
|
|
43
61
|
AlbexCapacityError,
|
|
62
|
+
assertFileSizeWithinLimit,
|
|
44
63
|
} from './errors.js';
|
|
45
64
|
|
|
46
65
|
export interface AlbexWorkerOptions extends AlbexOptions {
|
|
@@ -60,12 +79,32 @@ export class AlbexEngineWorker {
|
|
|
60
79
|
private _worker!: Worker;
|
|
61
80
|
private _nextId = 1;
|
|
62
81
|
private _pending = new Map<number, Pending>();
|
|
63
|
-
private _docsCache: IndexedDocument[] = [];
|
|
64
82
|
|
|
65
83
|
constructor(opts: AlbexWorkerOptions) {
|
|
66
84
|
this._opts = opts;
|
|
67
85
|
}
|
|
68
86
|
|
|
87
|
+
/**
|
|
88
|
+
* Spawn the worker and initialise the engine inside it.
|
|
89
|
+
*
|
|
90
|
+
* Every serializable engine option is forwarded across the worker
|
|
91
|
+
* boundary (`wasmUrl`, `wasmBaseUrl`, `pdfWasmUrl`, `capacity`, `simd`,
|
|
92
|
+
* `gpu`, `gpuThreshold`, `maxFileBytes`) — only `workerUrl`, which is
|
|
93
|
+
* consumed on this side, is stripped. Notes on what applies in a worker:
|
|
94
|
+
*
|
|
95
|
+
* - `capacity`: fully honoured — both the `'std'`/`'large'` presets
|
|
96
|
+
* (plain strings) and a custom object are structured-clone-safe, so
|
|
97
|
+
* the worker-side engine sizes its pools exactly like a main-thread
|
|
98
|
+
* engine would. Mind the memory cost (`'large'` ≈ 180 MB) lives in
|
|
99
|
+
* the worker's heap.
|
|
100
|
+
* - `wasmBaseUrl` + `simd`: fully honoured — the worker-side engine can
|
|
101
|
+
* load the `_simd.wasm` variant.
|
|
102
|
+
* - `gpu` / `gpuThreshold`: honoured where the worker runtime exposes
|
|
103
|
+
* WebGPU. `navigator.gpu` is available in dedicated workers in
|
|
104
|
+
* Chromium-based browsers (compute needs no canvas); elsewhere the
|
|
105
|
+
* engine's GPU probe fails gracefully and searches use the CPU
|
|
106
|
+
* pre-filter, exactly as on the main thread.
|
|
107
|
+
*/
|
|
69
108
|
async init(): Promise<void> {
|
|
70
109
|
this._worker = new Worker(this._opts.workerUrl, { type: 'module' });
|
|
71
110
|
this._worker.onmessage = (ev: MessageEvent<WorkerResponse>) => {
|
|
@@ -82,10 +121,16 @@ export class AlbexEngineWorker {
|
|
|
82
121
|
for (const [, p] of this._pending) p.reject(err);
|
|
83
122
|
this._pending.clear();
|
|
84
123
|
};
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
124
|
+
// Forward every serializable engine option. AlbexOptions is data-only
|
|
125
|
+
// (strings/numbers/booleans), but filter defensively so a future
|
|
126
|
+
// non-clonable option (function, DOM handle) cannot break postMessage.
|
|
127
|
+
const opts: AlbexOptions = {};
|
|
128
|
+
for (const [k, v] of Object.entries(this._opts)) {
|
|
129
|
+
if (k === 'workerUrl') continue; // consumed on this side
|
|
130
|
+
if (v === undefined || typeof v === 'function') continue;
|
|
131
|
+
(opts as Record<string, unknown>)[k] = v;
|
|
132
|
+
}
|
|
133
|
+
await this._send({ kind: 'init', opts });
|
|
89
134
|
}
|
|
90
135
|
|
|
91
136
|
private _send<T = unknown>(op: WorkerOp, transfer: Transferable[] = []): Promise<T> {
|
|
@@ -98,20 +143,31 @@ export class AlbexEngineWorker {
|
|
|
98
143
|
}
|
|
99
144
|
|
|
100
145
|
async indexFile(file: File): Promise<IndexedDocument> {
|
|
146
|
+
// Size guard BEFORE reading: the worker-side engine enforces the same
|
|
147
|
+
// limit, but checking here avoids buffering an oversized file on the
|
|
148
|
+
// main thread just to have the worker reject it.
|
|
149
|
+
assertFileSizeWithinLimit(file, this._opts.maxFileBytes);
|
|
101
150
|
const buffer = await file.arrayBuffer();
|
|
102
151
|
// Transfer the buffer to avoid a copy.
|
|
103
|
-
|
|
152
|
+
return this._send<IndexedDocument>(
|
|
104
153
|
{ kind: 'indexFile', name: file.name, buffer },
|
|
105
154
|
[buffer],
|
|
106
155
|
);
|
|
107
|
-
this._docsCache.push(doc);
|
|
108
|
-
return doc;
|
|
109
156
|
}
|
|
110
157
|
|
|
111
158
|
search(query: string, opts: SearchOptions = {}): Promise<SearchResult[]> {
|
|
112
159
|
return this._send<SearchResult[]>({ kind: 'search', query, options: opts });
|
|
113
160
|
}
|
|
114
161
|
|
|
162
|
+
/**
|
|
163
|
+
* Enumerate the authoritative chunks Albex indexed for `docId`
|
|
164
|
+
* (`IndexedDocument.docId` from {@link indexFile}). Mirrors
|
|
165
|
+
* `AlbexEngine.listChunks` across the worker boundary.
|
|
166
|
+
*/
|
|
167
|
+
listChunks(docId: number): Promise<AuthoritativeChunk[]> {
|
|
168
|
+
return this._send<AuthoritativeChunk[]>({ kind: 'listChunks', docId });
|
|
169
|
+
}
|
|
170
|
+
|
|
115
171
|
/**
|
|
116
172
|
* Cooperative variant of `search`. Today the wire still sends a single
|
|
117
173
|
* batch — the result array is fetched in one round-trip from the worker
|
|
@@ -137,17 +193,44 @@ export class AlbexEngineWorker {
|
|
|
137
193
|
}
|
|
138
194
|
|
|
139
195
|
async removeDocument(id: string): Promise<boolean> {
|
|
140
|
-
|
|
141
|
-
if (ok) this._docsCache = this._docsCache.filter(d => d.name !== id && d.contentHash !== id);
|
|
142
|
-
return ok;
|
|
196
|
+
return this._send<boolean>({ kind: 'removeDocument', id });
|
|
143
197
|
}
|
|
144
198
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
199
|
+
/**
|
|
200
|
+
* Replace a previously indexed document with new content. Mirrors
|
|
201
|
+
* `AlbexEngine.replaceDocument`: equivalent to `removeDocument(name)` +
|
|
202
|
+
* `indexFile(newFile)` without tripping the idempotency check, plus an
|
|
203
|
+
* opportunistic compact under text-pool pressure — all inside the
|
|
204
|
+
* worker-side engine's lock. The file bytes are transferred (zero-copy),
|
|
205
|
+
* like `indexFile`.
|
|
206
|
+
*/
|
|
207
|
+
async replaceDocument(name: string, newFile: File): Promise<IndexedDocument> {
|
|
208
|
+
assertFileSizeWithinLimit(newFile, this._opts.maxFileBytes);
|
|
209
|
+
const buffer = await newFile.arrayBuffer();
|
|
210
|
+
return this._send<IndexedDocument>(
|
|
211
|
+
{ kind: 'replaceDocument', name, fileName: newFile.name, buffer },
|
|
212
|
+
[buffer],
|
|
213
|
+
);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
/**
|
|
217
|
+
* Drain and return the diagnostics collected by the worker-side engine
|
|
218
|
+
* since the last call. Mirrors `AlbexEngine.takeDiagnostics` — consult it
|
|
219
|
+
* after `indexFile`/`load` to surface recoverable issues (PDF fallbacks,
|
|
220
|
+
* skipped content, persistence warnings). The worker-side buffer is
|
|
221
|
+
* cleared on each call.
|
|
222
|
+
*
|
|
223
|
+
* Particularly important in a worker: scanned PDFs index with **0 chunks**
|
|
224
|
+
* (no OCR available — see the note on OCR below), and the diagnostic
|
|
225
|
+
* explaining why is only visible through this method.
|
|
226
|
+
*/
|
|
227
|
+
takeDiagnostics(): Promise<AlbexDiagnostic[]> {
|
|
228
|
+
return this._send<AlbexDiagnostic[]>({ kind: 'takeDiagnostics' });
|
|
149
229
|
}
|
|
150
230
|
|
|
231
|
+
async compact(): Promise<void> { await this._send({ kind: 'compact' }); }
|
|
232
|
+
async reset(): Promise<void> { await this._send({ kind: 'reset' }); }
|
|
233
|
+
|
|
151
234
|
getStats(): Promise<EngineStats> { return this._send({ kind: 'getStats' }); }
|
|
152
235
|
getLastSearchStats(): Promise<SearchStats | null> { return this._send({ kind: 'getLastSearchStats' }); }
|
|
153
236
|
getDocuments(): Promise<readonly IndexedDocument[]> { return this._send({ kind: 'getDocuments' }); }
|
|
@@ -168,16 +251,18 @@ export class AlbexEngineWorker {
|
|
|
168
251
|
for (const [, p] of this._pending) p.reject(new AlbexError('disposed', 'Engine disposed'));
|
|
169
252
|
this._pending.clear();
|
|
170
253
|
this._worker?.terminate();
|
|
171
|
-
this._docsCache = [];
|
|
172
254
|
}
|
|
173
255
|
}
|
|
174
256
|
|
|
175
|
-
function rehydrateError(e: { name: string; kind?: string; message: string }): Error {
|
|
257
|
+
function rehydrateError(e: { name: string; kind?: string; message: string; limit?: string; max?: number }): Error {
|
|
176
258
|
switch (e.kind) {
|
|
177
259
|
case 'init': return new AlbexInitError(e.message);
|
|
178
260
|
case 'unsupported_format': return new AlbexUnsupportedFormatError(e.message.replace(/^Unsupported format: \./, ''));
|
|
179
261
|
case 'parse': return new AlbexParseError('unknown', e.message);
|
|
180
|
-
|
|
262
|
+
// `limit`/`max` survive the wire (worker-runtime serialises them) so
|
|
263
|
+
// the rehydrated error still reports the runtime capacity that
|
|
264
|
+
// overflowed inside the worker-side engine.
|
|
265
|
+
case 'capacity': return new AlbexCapacityError(e.message, e.limit as never, e.max);
|
|
181
266
|
default: {
|
|
182
267
|
const err = new Error(e.message);
|
|
183
268
|
err.name = e.name;
|