albex 0.1.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/CHANGELOG.md +416 -0
  2. package/README.md +244 -112
  3. package/dist/albex-worker.d.ts +70 -0
  4. package/dist/albex-worker.d.ts.map +1 -0
  5. package/dist/albex-worker.js +153 -0
  6. package/dist/albex-worker.js.map +1 -0
  7. package/dist/albex.d.ts +508 -6
  8. package/dist/albex.d.ts.map +1 -1
  9. package/dist/albex.js +1911 -141
  10. package/dist/albex.js.map +1 -1
  11. package/dist/errors.d.ts +52 -0
  12. package/dist/errors.d.ts.map +1 -0
  13. package/dist/errors.js +66 -0
  14. package/dist/errors.js.map +1 -0
  15. package/dist/gpu/bloom-runtime.d.ts +60 -0
  16. package/dist/gpu/bloom-runtime.d.ts.map +1 -0
  17. package/dist/gpu/bloom-runtime.js +176 -0
  18. package/dist/gpu/bloom-runtime.js.map +1 -0
  19. package/dist/gpu/bloom-shader.wgsl.d.ts +19 -0
  20. package/dist/gpu/bloom-shader.wgsl.d.ts.map +1 -0
  21. package/dist/gpu/bloom-shader.wgsl.js +49 -0
  22. package/dist/gpu/bloom-shader.wgsl.js.map +1 -0
  23. package/dist/persistence.d.ts +21 -0
  24. package/dist/persistence.d.ts.map +1 -0
  25. package/dist/persistence.js +174 -0
  26. package/dist/persistence.js.map +1 -0
  27. package/dist/pool/coordinator.d.ts +98 -0
  28. package/dist/pool/coordinator.d.ts.map +1 -0
  29. package/dist/pool/coordinator.js +247 -0
  30. package/dist/pool/coordinator.js.map +1 -0
  31. package/dist/profile.d.ts +100 -0
  32. package/dist/profile.d.ts.map +1 -0
  33. package/dist/profile.js +200 -0
  34. package/dist/profile.js.map +1 -0
  35. package/dist/resource-manager.d.ts +56 -0
  36. package/dist/resource-manager.d.ts.map +1 -0
  37. package/dist/resource-manager.js +138 -0
  38. package/dist/resource-manager.js.map +1 -0
  39. package/dist/tiered-store.d.ts +98 -0
  40. package/dist/tiered-store.d.ts.map +1 -0
  41. package/dist/tiered-store.js +238 -0
  42. package/dist/tiered-store.js.map +1 -0
  43. package/dist/wasm-bindings.d.ts +180 -0
  44. package/dist/wasm-bindings.d.ts.map +1 -0
  45. package/dist/wasm-bindings.js +128 -0
  46. package/dist/wasm-bindings.js.map +1 -0
  47. package/dist/worker-protocol.d.ts +86 -0
  48. package/dist/worker-protocol.d.ts.map +1 -0
  49. package/dist/worker-protocol.js +20 -0
  50. package/dist/worker-protocol.js.map +1 -0
  51. package/dist/worker-runtime.d.ts +14 -0
  52. package/dist/worker-runtime.d.ts.map +1 -0
  53. package/dist/worker-runtime.js +109 -0
  54. package/dist/worker-runtime.js.map +1 -0
  55. package/package.json +60 -13
  56. package/src/albex-worker.ts +187 -0
  57. package/src/albex.ts +2136 -189
  58. package/src/errors.ts +76 -0
  59. package/src/gpu/bloom-runtime.ts +229 -0
  60. package/src/gpu/bloom-shader.wgsl.ts +48 -0
  61. package/src/persistence.ts +175 -0
  62. package/src/pool/coordinator.ts +324 -0
  63. package/src/profile.ts +280 -0
  64. package/src/resource-manager.ts +167 -0
  65. package/src/tiered-store.ts +259 -0
  66. package/src/wasm-bindings.ts +349 -0
  67. package/src/worker-protocol.ts +48 -0
  68. package/src/worker-runtime.ts +106 -0
  69. package/wasm/pkg/albex_pdf.wasm +0 -0
  70. package/wasm/pkg/albex_wasm.wasm +0 -0
  71. package/wasm/pkg/albex_wasm_bg.wasm +0 -0
  72. package/wasm/pkg/albex_wasm_simd.wasm +0 -0
@@ -0,0 +1,167 @@
1
+ /**
2
+ * Resource manager — listens to environmental signals and exposes them as
3
+ * a small event API consumed by `AlbexEngine` (and `AlbexEngineWorker`).
4
+ *
5
+ * The signals tracked:
6
+ *
7
+ * - **Visibility** — `document.visibilitychange`. When the tab is hidden
8
+ * the engine should pause speculative work (background indexing,
9
+ * prefetch of optional binaries) but must still answer in-flight queries.
10
+ *
11
+ * - **Battery** — `navigator.getBattery()`. When level <20% AND not
12
+ * charging, switch to low-power mode (smaller worker pool, longer
13
+ * frame budget yields, no GPU acceleration).
14
+ *
15
+ * - **Connection** — `navigator.connection.effectiveType` + `saveData`.
16
+ * On `'slow-2g'/'2g'` or `saveData === true`, defer optional downloads
17
+ * (PDF wasm, embedding model) until the user explicitly needs them.
18
+ *
19
+ * The manager is *passive*: it does not call into the engine. Instead it
20
+ * exposes a `state` snapshot and an `on(event, callback)` subscription so
21
+ * the engine can react with its own policy. This keeps the dependency
22
+ * direction one-way and lets the engine be tested without the DOM.
23
+ */
24
+
25
+ export type ResourceMode = 'normal' | 'low-power' | 'background' | 'constrained-network';
26
+
27
+ export interface ResourceState {
28
+ visible: boolean;
29
+ lowPower: boolean;
30
+ constrainedNetwork: boolean;
31
+ /** Composite mode derived from the three signals above. */
32
+ mode: ResourceMode;
33
+ }
34
+
35
+ type Listener = (state: ResourceState) => void;
36
+
37
+ interface BatteryLike {
38
+ level: number;
39
+ charging: boolean;
40
+ addEventListener?: (type: string, cb: () => void) => void;
41
+ removeEventListener?: (type: string, cb: () => void) => void;
42
+ }
43
+
44
+ interface ConnectionLike {
45
+ effectiveType?: string;
46
+ saveData?: boolean;
47
+ addEventListener?: (type: string, cb: () => void) => void;
48
+ removeEventListener?: (type: string, cb: () => void) => void;
49
+ }
50
+
51
+ export class ResourceManager {
52
+ private _state: ResourceState = {
53
+ visible: true,
54
+ lowPower: false,
55
+ constrainedNetwork: false,
56
+ mode: 'normal',
57
+ };
58
+ private _listeners = new Set<Listener>();
59
+ private _battery: BatteryLike | null = null;
60
+ private _connection: ConnectionLike | null = null;
61
+ private _onVisibility = (): void => this._refresh();
62
+ private _onBatteryChange = (): void => this._refresh();
63
+ private _onConnChange = (): void => this._refresh();
64
+ private _started = false;
65
+
66
+ get state(): ResourceState { return this._state; }
67
+
68
+ /** Subscribe to changes. Returns an unsubscribe function. */
69
+ on(cb: Listener): () => void {
70
+ this._listeners.add(cb);
71
+ return () => this._listeners.delete(cb);
72
+ }
73
+
74
+ /**
75
+ * Start listening. Idempotent. Safe to call from non-browser environments
76
+ * (Node tests, Workers without DOM access) — missing APIs are tolerated.
77
+ */
78
+ async start(): Promise<void> {
79
+ if (this._started) return;
80
+ this._started = true;
81
+
82
+ if (typeof document !== 'undefined' && document.addEventListener) {
83
+ document.addEventListener('visibilitychange', this._onVisibility);
84
+ }
85
+
86
+ try {
87
+ // @ts-expect-error Battery API is non-standard in TS typings
88
+ const getBat: (() => Promise<BatteryLike>) | undefined = navigator?.getBattery?.bind(navigator);
89
+ if (getBat) {
90
+ const b = await getBat();
91
+ this._battery = b;
92
+ b.addEventListener?.('levelchange', this._onBatteryChange);
93
+ b.addEventListener?.('chargingchange', this._onBatteryChange);
94
+ }
95
+ } catch { /* unavailable; tolerate */ }
96
+
97
+ const conn = (navigator as unknown as { connection?: ConnectionLike } | undefined)?.connection;
98
+ if (conn) {
99
+ this._connection = conn;
100
+ conn.addEventListener?.('change', this._onConnChange);
101
+ }
102
+
103
+ this._refresh();
104
+ }
105
+
106
+ /** Tear down listeners. */
107
+ stop(): void {
108
+ if (!this._started) return;
109
+ this._started = false;
110
+
111
+ if (typeof document !== 'undefined' && document.removeEventListener) {
112
+ document.removeEventListener('visibilitychange', this._onVisibility);
113
+ }
114
+ this._battery?.removeEventListener?.('levelchange', this._onBatteryChange);
115
+ this._battery?.removeEventListener?.('chargingchange', this._onBatteryChange);
116
+ this._connection?.removeEventListener?.('change', this._onConnChange);
117
+ this._battery = null;
118
+ this._connection = null;
119
+ }
120
+
121
+ private _refresh(): void {
122
+ const visible = typeof document !== 'undefined'
123
+ ? document.visibilityState === 'visible'
124
+ : true;
125
+
126
+ const lowPower = !!(this._battery
127
+ && this._battery.level < 0.2
128
+ && this._battery.charging === false);
129
+
130
+ const conn = this._connection;
131
+ const constrainedNetwork = !!conn && (
132
+ conn.saveData === true ||
133
+ conn.effectiveType === 'slow-2g' ||
134
+ conn.effectiveType === '2g'
135
+ );
136
+
137
+ let mode: ResourceMode = 'normal';
138
+ if (!visible) mode = 'background';
139
+ else if (lowPower) mode = 'low-power';
140
+ else if (constrainedNetwork) mode = 'constrained-network';
141
+
142
+ const next: ResourceState = { visible, lowPower, constrainedNetwork, mode };
143
+ if (
144
+ next.visible === this._state.visible &&
145
+ next.lowPower === this._state.lowPower &&
146
+ next.constrainedNetwork === this._state.constrainedNetwork &&
147
+ next.mode === this._state.mode
148
+ ) return;
149
+
150
+ this._state = next;
151
+ for (const cb of this._listeners) {
152
+ try { cb(next); } catch { /* swallow listener errors */ }
153
+ }
154
+ }
155
+ }
156
+
157
+ /**
158
+ * Singleton accessor. Multiple engines in the same realm share the same
159
+ * manager — there is no benefit to running the listeners more than once,
160
+ * and the signal is global to the page anyway.
161
+ */
162
+ let _instance: ResourceManager | null = null;
163
+
164
+ export function getResourceManager(): ResourceManager {
165
+ if (!_instance) _instance = new ResourceManager();
166
+ return _instance;
167
+ }
@@ -0,0 +1,259 @@
1
+ /**
2
+ * Tiered storage layer for Albex.
3
+ *
4
+ * The base engine keeps every indexed document in memory inside the BSS
5
+ * arrays. That works beautifully up to the tier's capacity (4 MB to 128 MB
6
+ * of indexed text) but breaks when the user wants to search across more.
7
+ *
8
+ * `TieredStore` adds two memory tiers behind the engine:
9
+ *
10
+ * HOT — already-indexed documents living in the WASM arrays.
11
+ * WARM — original file blobs serialised in OPFS, NOT in the engine.
12
+ *
13
+ * Eviction happens when text capacity climbs above a configurable
14
+ * threshold (default 85 %). The least-recently-accessed HOT document is
15
+ * removed from the engine; its blob stays in OPFS so we can promote it
16
+ * back later without asking the user to re-pick the file.
17
+ *
18
+ * Promotion happens explicitly: callers tell the store "I want these
19
+ * names searchable again" and we re-feed them through `engine.indexFile`.
20
+ *
21
+ * **Trade-off vs storing the internal representation in OPFS:** promoting
22
+ * a doc means re-parsing it (DOCX XML decode, etc.). For typical document
23
+ * sizes this is 20-200 ms — negligible for an explicit "search across the
24
+ * archive" action. The win is huge: the persistence format is just the
25
+ * source files, so it survives engine version bumps without any migration.
26
+ */
27
+
28
+ import type { AlbexEngine, IndexedDocument } from './albex.js';
29
+
30
+ const OPFS_DIR = 'albex-tiered';
31
+
32
+ interface TieredEntry {
33
+ name: string;
34
+ ext: string;
35
+ lastAccessedMs: number;
36
+ /** Whether the doc is currently indexed in the engine. */
37
+ hot: boolean;
38
+ /** Size of the original blob in bytes, for capacity estimation. */
39
+ byteSize: number;
40
+ }
41
+
42
+ export interface TieredStoreOptions {
43
+ /**
44
+ * Evict when textUsed exceeds `evictThreshold * textCapacity`.
45
+ * Default 0.85. Set 1.0 to disable.
46
+ */
47
+ evictThreshold?: number;
48
+ /**
49
+ * Keep this many documents in the hot tier at minimum. Default 1.
50
+ * Useful when you want to ensure the most recently added file is
51
+ * always available without an explicit promote step.
52
+ */
53
+ hotFloor?: number;
54
+ }
55
+
56
+ export class TieredStore {
57
+ private readonly _engine: AlbexEngine;
58
+ private _entries = new Map<string, TieredEntry>();
59
+ private _dir: FileSystemDirectoryHandle | null = null;
60
+ private readonly _opts: Required<TieredStoreOptions>;
61
+
62
+ constructor(engine: AlbexEngine, opts: TieredStoreOptions = {}) {
63
+ this._engine = engine;
64
+ this._opts = {
65
+ evictThreshold: opts.evictThreshold ?? 0.85,
66
+ hotFloor: opts.hotFloor ?? 1,
67
+ };
68
+ }
69
+
70
+ /** Ensure the OPFS directory exists. Idempotent. Tolerated when OPFS is unavailable. */
71
+ async init(): Promise<void> {
72
+ try {
73
+ const root = await navigator.storage.getDirectory();
74
+ this._dir = await root.getDirectoryHandle(OPFS_DIR, { create: true });
75
+ } catch {
76
+ // No OPFS — warm tier disabled; everything stays hot.
77
+ this._dir = null;
78
+ }
79
+ await this._rehydrateIndex();
80
+ }
81
+
82
+ /**
83
+ * Index a file AND register it in the warm tier so it survives across
84
+ * sessions. Equivalent to `engine.indexFile(file)` but with the extra
85
+ * persistence guarantee.
86
+ */
87
+ async indexFile(file: File): Promise<IndexedDocument> {
88
+ const doc = await this._engine.indexFile(file);
89
+
90
+ // Persist the original blob in OPFS so we can promote it back later
91
+ // without asking the user to re-pick the file.
92
+ await this._writeBlob(file);
93
+
94
+ this._entries.set(doc.name, {
95
+ name: doc.name,
96
+ ext: doc.ext,
97
+ lastAccessedMs: Date.now(),
98
+ hot: true,
99
+ byteSize: file.size,
100
+ });
101
+
102
+ await this._enforceCapacity();
103
+ return doc;
104
+ }
105
+
106
+ /** Touch an entry to mark it recently used (for LRU). */
107
+ touch(name: string): void {
108
+ const e = this._entries.get(name);
109
+ if (e) e.lastAccessedMs = Date.now();
110
+ }
111
+
112
+ /**
113
+ * Evict the least-recently-used HOT documents until `textUsed` falls
114
+ * below the configured threshold. Respects `hotFloor` (never evicts
115
+ * the last N docs).
116
+ */
117
+ async _enforceCapacity(): Promise<void> {
118
+ const stats = this._engine.getStats();
119
+ const threshold = stats.textCapacity * this._opts.evictThreshold;
120
+ if (stats.textUsed <= threshold) return;
121
+
122
+ const hot = [...this._entries.values()]
123
+ .filter(e => e.hot)
124
+ .sort((a, b) => a.lastAccessedMs - b.lastAccessedMs);
125
+
126
+ while (hot.length > this._opts.hotFloor) {
127
+ const victim = hot.shift()!;
128
+ this._engine.removeDocument(victim.name);
129
+ victim.hot = false;
130
+ // Reclaim storage now so subsequent index calls see real headroom.
131
+ this._engine.compact();
132
+ const after = this._engine.getStats();
133
+ if (after.textUsed <= threshold) break;
134
+ }
135
+ }
136
+
137
+ /**
138
+ * Bring a warm document back into the engine. No-op if already hot.
139
+ * Returns the resulting `IndexedDocument` or `null` if the doc isn't known.
140
+ */
141
+ async promote(name: string): Promise<IndexedDocument | null> {
142
+ const e = this._entries.get(name);
143
+ if (!e) return null;
144
+ if (e.hot) {
145
+ this.touch(name);
146
+ const stats = this._engine.getStats();
147
+ void stats;
148
+ return this._engine.documents.find(d => d.name === name) ?? null;
149
+ }
150
+
151
+ const blob = await this._readBlob(name);
152
+ if (!blob) return null;
153
+
154
+ // Re-create the File so `engine.indexFile` sees the original metadata.
155
+ const file = new File([blob], name);
156
+ const doc = await this._engine.indexFile(file);
157
+ e.hot = true;
158
+ e.lastAccessedMs = Date.now();
159
+ return doc;
160
+ }
161
+
162
+ /**
163
+ * Forget a document entirely: remove from engine and delete its OPFS blob.
164
+ * Returns whether the entry existed.
165
+ */
166
+ async forget(name: string): Promise<boolean> {
167
+ const had = this._entries.has(name);
168
+ if (this._entries.get(name)?.hot) this._engine.removeDocument(name);
169
+ this._entries.delete(name);
170
+ await this._deleteBlob(name);
171
+ return had;
172
+ }
173
+
174
+ /** Names of all known documents, hot or warm. */
175
+ list(): { name: string; hot: boolean; byteSize: number }[] {
176
+ return [...this._entries.values()].map(e => ({
177
+ name: e.name, hot: e.hot, byteSize: e.byteSize,
178
+ }));
179
+ }
180
+
181
+ /** Aggregate storage stats. */
182
+ getTierStats(): { hot: number; warm: number; totalBytes: number } {
183
+ let hot = 0, warm = 0, totalBytes = 0;
184
+ for (const e of this._entries.values()) {
185
+ if (e.hot) hot++; else warm++;
186
+ totalBytes += e.byteSize;
187
+ }
188
+ return { hot, warm, totalBytes };
189
+ }
190
+
191
+ // ── OPFS plumbing ─────────────────────────────────────────────────────
192
+
193
+ private _safeName(name: string): string {
194
+ // Strip path separators just in case; OPFS requires plain file names.
195
+ return name.replace(/[/\\]/g, '_');
196
+ }
197
+
198
+ private async _writeBlob(file: File): Promise<void> {
199
+ if (!this._dir) return;
200
+ try {
201
+ const handle = await this._dir.getFileHandle(this._safeName(file.name), { create: true });
202
+ const w = await handle.createWritable();
203
+ const bytes = new Uint8Array(await file.arrayBuffer());
204
+ const plain = new Uint8Array(bytes.byteLength);
205
+ plain.set(bytes);
206
+ await w.write(plain);
207
+ await w.close();
208
+ } catch (e) {
209
+ console.warn(`[albex] failed to persist blob for ${file.name}:`, e);
210
+ }
211
+ }
212
+
213
+ private async _readBlob(name: string): Promise<Blob | null> {
214
+ if (!this._dir) return null;
215
+ try {
216
+ const handle = await this._dir.getFileHandle(this._safeName(name));
217
+ return await handle.getFile();
218
+ } catch {
219
+ return null;
220
+ }
221
+ }
222
+
223
+ private async _deleteBlob(name: string): Promise<void> {
224
+ if (!this._dir) return;
225
+ try { await this._dir.removeEntry(this._safeName(name)); } catch { /* not found */ }
226
+ }
227
+
228
+ /**
229
+ * TC39 explicit-resource-management hook. Drops the in-memory index and
230
+ * the OPFS directory handle. The underlying OPFS blobs are NOT deleted —
231
+ * disposal only frees JS-side state. Use `forget()` per-doc or
232
+ * `deleteOpfsAll()` if you want to wipe persisted data.
233
+ */
234
+ [Symbol.dispose](): void {
235
+ this._entries.clear();
236
+ this._dir = null;
237
+ }
238
+
239
+ private async _rehydrateIndex(): Promise<void> {
240
+ if (!this._dir) return;
241
+ try {
242
+ // @ts-expect-error async-iterable on FileSystemDirectoryHandle
243
+ for await (const [name, handle] of this._dir.entries()) {
244
+ if (handle.kind !== 'file') continue;
245
+ const file = await handle.getFile();
246
+ if (this._entries.has(name)) continue;
247
+ this._entries.set(name, {
248
+ name,
249
+ ext: (name.split('.').pop() ?? '').toLowerCase(),
250
+ lastAccessedMs: 0, // never accessed in this session yet
251
+ hot: false,
252
+ byteSize: file.size,
253
+ });
254
+ }
255
+ } catch (e) {
256
+ console.warn('[albex] could not rehydrate tiered index:', e);
257
+ }
258
+ }
259
+ }