@totalreclaw/totalreclaw 1.6.0 → 3.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/digest-sync.ts ADDED
@@ -0,0 +1,516 @@
1
+ /**
2
+ * TotalReclaw Plugin — digest read path (Stage 3b).
3
+ *
4
+ * Loads the latest digest claim from the subgraph, checks staleness, and
5
+ * returns the pre-compiled `promptText` for injection into before_agent_start.
6
+ * Triggers background recompilation (non-blocking) when the digest is stale
7
+ * and the guard conditions (>=10 new claims OR >=24h) are met.
8
+ *
9
+ * The digest is stored on-chain as a regular encrypted fact where the
10
+ * decrypted content is a canonical Claim with category="dig" and a
11
+ * distinctive blind-index marker `DIGEST_TRAPDOOR`.
12
+ */
13
+
14
+ import { createRequire } from 'node:module';
15
+ import {
16
+ DIGEST_CLAIM_CAP,
17
+ DIGEST_TRAPDOOR,
18
+ buildDigestClaim,
19
+ extractDigestFromClaim,
20
+ hoursSince,
21
+ isDigestStale,
22
+ shouldRecompile,
23
+ type DigestMode,
24
+ } from './claims-helper.js';
25
+
26
+ const requireWasm = createRequire(import.meta.url);
27
+ let _wasm: typeof import('@totalreclaw/core') | null = null;
28
+ function getWasm() {
29
+ if (!_wasm) _wasm = requireWasm('@totalreclaw/core');
30
+ return _wasm!;
31
+ }
32
+
33
+ // ---------------------------------------------------------------------------
34
+ // Types
35
+ // ---------------------------------------------------------------------------
36
+
37
+ export interface DigestLogger {
38
+ info: (msg: string) => void;
39
+ warn: (msg: string) => void;
40
+ }
41
+
42
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
43
+ export type ParsedDigest = any;
44
+
45
+ export interface LoadedDigest {
46
+ digest: ParsedDigest;
47
+ claimId: string;
48
+ createdAt: number;
49
+ }
50
+
51
+ export interface EvaluateDigestStateInput {
52
+ digestVersion: number;
53
+ currentMaxCreatedAt: number;
54
+ countNewClaims: number;
55
+ hoursSinceCompilation: number;
56
+ }
57
+
58
+ export interface DigestState {
59
+ stale: boolean;
60
+ recompile: boolean;
61
+ }
62
+
63
+ export interface CompileDigestCoreInput {
64
+ claimsJson: string;
65
+ nowUnixSeconds: number;
66
+ mode: DigestMode;
67
+ /**
68
+ * Optional LLM invocation. Receives the compiled prompt and must return
69
+ * the raw model output. Throw on HTTP error or timeout — compileDigestCore
70
+ * catches and falls back to the template path.
71
+ */
72
+ llmFn: ((prompt: string) => Promise<string>) | null;
73
+ logger: DigestLogger;
74
+ }
75
+
76
+ // ---------------------------------------------------------------------------
77
+ // Recompile-in-progress guard (in-memory, per-process)
78
+ // ---------------------------------------------------------------------------
79
+
80
+ let _recompileInProgress = false;
81
+
82
+ /** Is a digest recompilation currently running for this process? */
83
+ export function isRecompileInProgress(): boolean {
84
+ return _recompileInProgress;
85
+ }
86
+
87
+ /** Attempt to acquire the recompile lock. Returns true on success. */
88
+ export function tryBeginRecompile(): boolean {
89
+ if (_recompileInProgress) return false;
90
+ _recompileInProgress = true;
91
+ return true;
92
+ }
93
+
94
+ /** Release the recompile lock — always call in a finally block. */
95
+ export function endRecompile(): void {
96
+ _recompileInProgress = false;
97
+ }
98
+
99
+ /** Test-only helper to reset module state between cases. */
100
+ export function __resetDigestSyncState(): void {
101
+ _recompileInProgress = false;
102
+ }
103
+
104
+ // ---------------------------------------------------------------------------
105
+ // Pure staleness + guard evaluation
106
+ // ---------------------------------------------------------------------------
107
+
108
+ /**
109
+ * Combine staleness + guard checks into one decision.
110
+ *
111
+ * The caller still needs to consult `isRecompileInProgress()` before firing
112
+ * the background task — this function is purely about the digest's age.
113
+ */
114
+ export function evaluateDigestState(input: EvaluateDigestStateInput): DigestState {
115
+ const stale = isDigestStale(input.digestVersion, input.currentMaxCreatedAt);
116
+ if (!stale) return { stale: false, recompile: false };
117
+ const recompile = shouldRecompile({
118
+ countNewClaims: input.countNewClaims,
119
+ hoursSinceCompilation: input.hoursSinceCompilation,
120
+ });
121
+ return { stale: true, recompile };
122
+ }
123
+
124
+ // ---------------------------------------------------------------------------
125
+ // Core compilation (pure, no I/O)
126
+ // ---------------------------------------------------------------------------
127
+
128
+ /**
129
+ * Compile a Digest JSON from an array of Claim JSON.
130
+ *
131
+ * - `mode === 'template'` or `llmFn === null` → template path.
132
+ * - `mode === 'on'` with a non-null `llmFn` → LLM path with template fallback.
133
+ * Any parsing/assembly/LLM failure logs a warning and falls back silently.
134
+ * - Claim count above DIGEST_CLAIM_CAP forces the template path regardless
135
+ * of mode, to keep LLM token cost bounded.
136
+ *
137
+ * Returns the Digest JSON as produced by the WASM core.
138
+ */
139
+ export async function compileDigestCore(input: CompileDigestCoreInput): Promise<string> {
140
+ const { claimsJson, nowUnixSeconds, mode, llmFn, logger } = input;
141
+ const core = getWasm();
142
+ const nowBig = BigInt(Math.floor(nowUnixSeconds));
143
+
144
+ // Check whether we should even attempt the LLM path.
145
+ let useLlm = mode === 'on' && llmFn !== null;
146
+ if (useLlm) {
147
+ try {
148
+ const parsedClaims = JSON.parse(claimsJson);
149
+ if (!Array.isArray(parsedClaims) || parsedClaims.length === 0) {
150
+ useLlm = false;
151
+ } else if (parsedClaims.length > DIGEST_CLAIM_CAP) {
152
+ logger.info(
153
+ `Digest: ${parsedClaims.length} active claims > cap ${DIGEST_CLAIM_CAP}; using template path`,
154
+ );
155
+ useLlm = false;
156
+ }
157
+ } catch {
158
+ useLlm = false;
159
+ }
160
+ }
161
+
162
+ if (useLlm && llmFn) {
163
+ try {
164
+ const prompt = core.buildDigestPrompt(claimsJson);
165
+ const raw = await llmFn(prompt);
166
+ if (!raw || typeof raw !== 'string' || raw.trim().length === 0) {
167
+ throw new Error('LLM returned empty response');
168
+ }
169
+ const parsedResponse = core.parseDigestResponse(raw);
170
+ return core.assembleDigestFromLlm(parsedResponse, claimsJson, nowBig);
171
+ } catch (err) {
172
+ const msg = err instanceof Error ? err.message : String(err);
173
+ logger.warn(`Digest LLM compilation failed, falling back to template: ${msg}`);
174
+ // fall through to template path
175
+ }
176
+ }
177
+
178
+ return core.buildTemplateDigest(claimsJson, nowBig);
179
+ }
180
+
181
+ // ---------------------------------------------------------------------------
182
+ // I/O helpers — subgraph reads
183
+ // ---------------------------------------------------------------------------
184
+
185
+ /**
186
+ * Find the latest digest claim on the subgraph for this owner, decrypt it,
187
+ * and return the parsed Digest. Returns null when no digest exists, the
188
+ * subgraph query fails, or the blob is not decryptable.
189
+ *
190
+ * The callback injection keeps this module easy to test — index.ts passes
191
+ * in the real `searchSubgraph` + `decryptFromHex`, tests can pass fakes.
192
+ */
193
+ export interface LoadLatestDigestDeps {
194
+ searchSubgraph: (
195
+ owner: string,
196
+ trapdoors: string[],
197
+ maxCandidates: number,
198
+ authKeyHex: string,
199
+ ) => Promise<Array<{ id: string; encryptedBlob: string; createdAt?: string; timestamp?: string }>>;
200
+ decryptFromHex: (hex: string, key: Buffer) => string;
201
+ }
202
+
203
+ export async function loadLatestDigest(
204
+ owner: string,
205
+ authKeyHex: string,
206
+ encryptionKey: Buffer,
207
+ deps: LoadLatestDigestDeps,
208
+ logger: DigestLogger,
209
+ ): Promise<LoadedDigest | null> {
210
+ let results: Awaited<ReturnType<typeof deps.searchSubgraph>>;
211
+ try {
212
+ results = await deps.searchSubgraph(owner, [DIGEST_TRAPDOOR], 10, authKeyHex);
213
+ } catch (err) {
214
+ const msg = err instanceof Error ? err.message : String(err);
215
+ logger.warn(`Digest: subgraph query failed: ${msg}`);
216
+ return null;
217
+ }
218
+ if (!results || results.length === 0) return null;
219
+
220
+ // Pick the highest createdAt (client-generated Unix seconds). Fall back to
221
+ // timestamp (block time) when createdAt is missing.
222
+ let best: { id: string; encryptedBlob: string; createdAt: number } | null = null;
223
+ for (const r of results) {
224
+ const createdAt = parseInt(r.createdAt ?? r.timestamp ?? '0', 10) || 0;
225
+ if (!best || createdAt > best.createdAt) {
226
+ best = { id: r.id, encryptedBlob: r.encryptedBlob, createdAt };
227
+ }
228
+ }
229
+ if (!best) return null;
230
+
231
+ try {
232
+ const decrypted = deps.decryptFromHex(best.encryptedBlob, encryptionKey);
233
+ const canonical = getWasm().parseClaimOrLegacy(decrypted);
234
+ const digest = extractDigestFromClaim(canonical);
235
+ if (!digest) {
236
+ logger.warn(`Digest: blob ${best.id.slice(0, 10)}… did not parse as a digest claim`);
237
+ return null;
238
+ }
239
+ return { digest, claimId: best.id, createdAt: best.createdAt };
240
+ } catch (err) {
241
+ const msg = err instanceof Error ? err.message : String(err);
242
+ logger.warn(`Digest: decrypt failed for ${best.id.slice(0, 10)}…: ${msg}`);
243
+ return null;
244
+ }
245
+ }
246
+
247
+ /**
248
+ * Probe the subgraph for recency + new-claim signals in one query.
249
+ *
250
+ * Fetches the 10 most-recent active facts (sorted by timestamp DESC from the
251
+ * broadened query), reads each row's `createdAt` (client-generated Unix
252
+ * seconds), and returns:
253
+ *
254
+ * - `maxCreatedAt`: the largest createdAt across the 10 rows (or 0 if none)
255
+ * - `countNewerThan(digestVersion)`: how many of the 10 have createdAt
256
+ * strictly greater than the digest's version; clamped at 10 by design
257
+ * (one query, one answer)
258
+ *
259
+ * That's enough to drive the §15.10 recompile guard: the 10-claim threshold
260
+ * is exactly what the single query measures. Any user with more than 10 new
261
+ * claims still trips the guard (we just saturate at 10 instead of knowing
262
+ * the exact count, which doesn't matter — the guard fires either way).
263
+ */
264
+ export interface DigestRecencyProbe {
265
+ maxCreatedAt: number;
266
+ countNewerThan(digestVersion: number): number;
267
+ }
268
+
269
+ export interface GetDigestRecencyProbeDeps {
270
+ searchSubgraphBroadened: (
271
+ owner: string,
272
+ maxCandidates: number,
273
+ authKeyHex: string,
274
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
275
+ ) => Promise<Array<any>>;
276
+ }
277
+
278
+ /** How many recent facts we fetch to drive the recompile guard. */
279
+ export const DIGEST_RECENCY_PROBE_LIMIT = 10;
280
+
281
+ export async function getDigestRecencyProbe(
282
+ owner: string,
283
+ authKeyHex: string,
284
+ deps: GetDigestRecencyProbeDeps,
285
+ ): Promise<DigestRecencyProbe> {
286
+ let results: Array<{ createdAt?: string; timestamp?: string }> = [];
287
+ try {
288
+ results = await deps.searchSubgraphBroadened(owner, DIGEST_RECENCY_PROBE_LIMIT, authKeyHex);
289
+ } catch {
290
+ return { maxCreatedAt: 0, countNewerThan: () => 0 };
291
+ }
292
+ if (!results || results.length === 0) {
293
+ return { maxCreatedAt: 0, countNewerThan: () => 0 };
294
+ }
295
+
296
+ const createdAts: number[] = [];
297
+ for (const r of results) {
298
+ const ts = parseInt(r.createdAt ?? r.timestamp ?? '0', 10);
299
+ if (!Number.isNaN(ts) && ts > 0) createdAts.push(ts);
300
+ }
301
+ const maxCreatedAt = createdAts.length > 0 ? Math.max(...createdAts) : 0;
302
+
303
+ return {
304
+ maxCreatedAt,
305
+ countNewerThan(digestVersion: number): number {
306
+ let n = 0;
307
+ for (const ca of createdAts) if (ca > digestVersion) n++;
308
+ return n;
309
+ },
310
+ };
311
+ }
312
+
313
+ /**
314
+ * Fetch all active claims for this owner (up to limit), decrypt each,
315
+ * parse as canonical Claim, and filter out infrastructure claims
316
+ * (digest and entity categories) so only user-facing memories remain.
317
+ *
318
+ * Returns an array of canonical Claim JSON strings (not parsed objects),
319
+ * suitable for passing directly to `buildDigestPrompt` / `buildTemplateDigest`
320
+ * after JSON.stringify-ing the array. The helper wraps them in a JSON array.
321
+ */
322
+ export interface FetchAllActiveClaimsDeps {
323
+ searchSubgraphBroadened: (
324
+ owner: string,
325
+ maxCandidates: number,
326
+ authKeyHex: string,
327
+ ) => Promise<Array<{ id: string; encryptedBlob: string; isActive?: boolean }>>;
328
+ decryptFromHex: (hex: string, key: Buffer) => string;
329
+ }
330
+
331
+ export async function fetchAllActiveClaims(
332
+ owner: string,
333
+ authKeyHex: string,
334
+ encryptionKey: Buffer,
335
+ limit: number,
336
+ deps: FetchAllActiveClaimsDeps,
337
+ logger: DigestLogger,
338
+ ): Promise<string> {
339
+ let rows: Awaited<ReturnType<typeof deps.searchSubgraphBroadened>>;
340
+ try {
341
+ rows = await deps.searchSubgraphBroadened(owner, limit, authKeyHex);
342
+ } catch (err) {
343
+ const msg = err instanceof Error ? err.message : String(err);
344
+ logger.warn(`Digest: fetchAllActiveClaims subgraph query failed: ${msg}`);
345
+ return '[]';
346
+ }
347
+ if (!rows || rows.length === 0) return '[]';
348
+
349
+ const claimsOut: unknown[] = [];
350
+ for (const row of rows) {
351
+ if (row.isActive === false) continue;
352
+ try {
353
+ const decrypted = deps.decryptFromHex(row.encryptedBlob, encryptionKey);
354
+ const canonicalJson = getWasm().parseClaimOrLegacy(decrypted);
355
+ const claim = JSON.parse(canonicalJson) as { c?: string };
356
+ // Skip infrastructure claims — digest and entity records aren't user memories.
357
+ if (claim.c === 'dig' || claim.c === 'ent') continue;
358
+ claimsOut.push(claim);
359
+ } catch {
360
+ // Skip un-decryptable / un-parseable rows. Don't fail the whole compilation.
361
+ }
362
+ }
363
+ return JSON.stringify(claimsOut);
364
+ }
365
+
366
+ // ---------------------------------------------------------------------------
367
+ // Build + inject logic (the full read path pipeline used by the hook)
368
+ // ---------------------------------------------------------------------------
369
+
370
+ export interface RecompileDigestDeps {
371
+ /** Called with the canonical Claim JSON string of the new digest. */
372
+ storeDigestClaim: (canonicalClaimJson: string, compiledAt: string) => Promise<void>;
373
+ /** Tombstone the previous digest (best-effort; failures are non-fatal). */
374
+ tombstoneDigest: (claimId: string) => Promise<void>;
375
+ fetchAllActiveClaimsFn: () => Promise<string>;
376
+ /** LLM invocation, or null when no LLM is configured. */
377
+ llmFn: ((prompt: string) => Promise<string>) | null;
378
+ }
379
+
380
+ export interface RecompileDigestInput {
381
+ mode: DigestMode;
382
+ previousClaimId: string | null;
383
+ nowUnixSeconds: number;
384
+ deps: RecompileDigestDeps;
385
+ logger: DigestLogger;
386
+ }
387
+
388
+ /**
389
+ * Full recompile pipeline. Safe to fire-and-forget (never throws).
390
+ *
391
+ * Steps:
392
+ * 1. Fetch all active claims (decrypted, filtered to user-facing categories)
393
+ * 2. Compile via template or LLM (with template fallback)
394
+ * 3. Wrap as a canonical Claim and encrypt + store on-chain
395
+ * 4. Tombstone the previous digest (if any) so only one digest stays indexed
396
+ *
397
+ * The caller should call `tryBeginRecompile` before scheduling and
398
+ * `endRecompile` in a finally. This function does not touch the guard itself.
399
+ */
400
+ export async function recompileDigest(input: RecompileDigestInput): Promise<void> {
401
+ const { mode, previousClaimId, nowUnixSeconds, deps, logger } = input;
402
+ try {
403
+ const claimsJson = await deps.fetchAllActiveClaimsFn();
404
+ const digestJson = await compileDigestCore({
405
+ claimsJson,
406
+ nowUnixSeconds,
407
+ mode,
408
+ llmFn: deps.llmFn,
409
+ logger,
410
+ });
411
+ const compiledAt = new Date(nowUnixSeconds * 1000).toISOString();
412
+ const canonical = buildDigestClaim({ digestJson, compiledAt });
413
+ await deps.storeDigestClaim(canonical, compiledAt);
414
+ if (previousClaimId) {
415
+ try {
416
+ await deps.tombstoneDigest(previousClaimId);
417
+ } catch (err) {
418
+ const msg = err instanceof Error ? err.message : String(err);
419
+ logger.warn(`Digest: tombstone of previous ${previousClaimId.slice(0, 10)}… failed: ${msg}`);
420
+ }
421
+ }
422
+ logger.info(`Digest: recompiled and stored (compiledAt=${compiledAt})`);
423
+ } catch (err) {
424
+ const msg = err instanceof Error ? err.message : String(err);
425
+ logger.warn(`Digest: recompile failed: ${msg}`);
426
+ }
427
+ }
428
+
429
+ // ---------------------------------------------------------------------------
430
+ // Top-level entry: maybeInjectDigest
431
+ // ---------------------------------------------------------------------------
432
+
433
+ export interface MaybeInjectDigestInput {
434
+ owner: string;
435
+ authKeyHex: string;
436
+ encryptionKey: Buffer;
437
+ mode: DigestMode;
438
+ nowMs: number;
439
+ loadDeps: LoadLatestDigestDeps;
440
+ probeDeps: GetDigestRecencyProbeDeps;
441
+ /** Fired (fire-and-forget) if the guard + state say so. */
442
+ recompileFn: (previousClaimId: string | null) => void;
443
+ logger: DigestLogger;
444
+ }
445
+
446
+ export interface MaybeInjectDigestResult {
447
+ /** When non-null, the caller injects this string into `## Your Memory`. */
448
+ promptText: string | null;
449
+ /** For debugging / session debrief. */
450
+ state: 'off' | 'fresh' | 'stale' | 'first-compile' | 'no-llm-yet';
451
+ }
452
+
453
+ /**
454
+ * Top-level read path helper. Decides whether to return a promptText from
455
+ * the latest digest, kicks off a background recompile when appropriate,
456
+ * and never throws.
457
+ *
458
+ * If this returns `{ promptText: null }`, the caller must fall back to the
459
+ * legacy individual-fact search path — digest injection is a fast path, not
460
+ * a replacement.
461
+ */
462
+ export async function maybeInjectDigest(
463
+ input: MaybeInjectDigestInput,
464
+ ): Promise<MaybeInjectDigestResult> {
465
+ const {
466
+ owner,
467
+ authKeyHex,
468
+ encryptionKey,
469
+ mode,
470
+ nowMs,
471
+ loadDeps,
472
+ probeDeps,
473
+ recompileFn,
474
+ logger,
475
+ } = input;
476
+
477
+ if (mode === 'off') {
478
+ return { promptText: null, state: 'off' };
479
+ }
480
+
481
+ // Fetch the latest digest and the recency probe in parallel.
482
+ const [loaded, probe] = await Promise.all([
483
+ loadLatestDigest(owner, authKeyHex, encryptionKey, loadDeps, logger),
484
+ getDigestRecencyProbe(owner, authKeyHex, probeDeps),
485
+ ]);
486
+
487
+ if (!loaded) {
488
+ // No digest exists yet — schedule a first compile, fall back to legacy search.
489
+ if (!isRecompileInProgress()) {
490
+ recompileFn(null);
491
+ }
492
+ return { promptText: null, state: 'first-compile' };
493
+ }
494
+
495
+ const digestVersion = typeof loaded.digest.version === 'number'
496
+ ? loaded.digest.version
497
+ : parseInt(String(loaded.digest.version ?? 0), 10) || 0;
498
+ const compiledAt = typeof loaded.digest.compiled_at === 'string' ? loaded.digest.compiled_at : '';
499
+
500
+ const state = evaluateDigestState({
501
+ digestVersion,
502
+ currentMaxCreatedAt: probe.maxCreatedAt,
503
+ countNewClaims: probe.countNewerThan(digestVersion),
504
+ hoursSinceCompilation: hoursSince(compiledAt, nowMs),
505
+ });
506
+
507
+ if (state.stale && state.recompile && !isRecompileInProgress()) {
508
+ recompileFn(loaded.claimId);
509
+ }
510
+
511
+ const promptText = typeof loaded.digest.prompt_text === 'string' ? loaded.digest.prompt_text : null;
512
+ return {
513
+ promptText,
514
+ state: state.stale ? 'stale' : 'fresh',
515
+ };
516
+ }
package/embedding.ts CHANGED
@@ -1,75 +1,98 @@
1
1
  /**
2
2
  * TotalReclaw Plugin - Local Embedding via @huggingface/transformers
3
3
  *
4
- * Uses the Qwen3-Embedding-0.6B ONNX model to generate 1024-dimensional
5
- * text embeddings locally. No API key needed, no data leaves the machine.
6
- * Supports 100+ languages (EN, PT, ES, ZH, etc.).
4
+ * Generates text embeddings locally using an ONNX model. No API key needed,
5
+ * no data leaves the machine. Preserves the E2EE guarantee.
7
6
  *
8
- * This preserves the E2EE guarantee: embeddings are generated
9
- * CLIENT-SIDE before encryption, so no plaintext ever reaches an external API.
7
+ * Locked to Harrier-OSS-v1-270M (640d, q4, ~344MB, pre-pooled). Changing the
8
+ * embedding model breaks search across an existing vault, so the
9
+ * `TOTALRECLAW_EMBEDDING_MODEL` user-facing env var was removed in v1.
10
10
  *
11
- * Model details:
12
- * - Quantized (int8) ONNX model: ~600MB download on first use
13
- * - Cached in ~/.cache/huggingface/ after first download
14
- * - Lazy initialization: first call ~3-5s (model load), subsequent ~100ms
15
- * - Output: 1024-dimensional normalized embedding vector
16
- * - No instruction prefix needed (bare queries perform better)
17
- *
18
- * Dependencies: @huggingface/transformers (handles model download,
19
- * tokenization, ONNX inference, last-token pooling, and normalization).
11
+ * Dependencies: @huggingface/transformers
20
12
  */
21
13
 
22
14
  // @ts-ignore - @huggingface/transformers types may not be perfect
23
- import { pipeline, type FeatureExtractionPipeline } from '@huggingface/transformers';
15
+ import { AutoTokenizer, AutoModel, pipeline, type FeatureExtractionPipeline } from '@huggingface/transformers';
24
16
 
25
- /** ONNX-optimized Qwen3-Embedding-0.6B from HuggingFace Hub. */
26
- const MODEL_ID = 'onnx-community/Qwen3-Embedding-0.6B-ONNX';
17
+ interface ModelConfig {
18
+ id: string;
19
+ dims: number;
20
+ /** 'sentence_embedding' for models with pre-pooled output, 'mean'/'last_token' for pipeline models */
21
+ pooling: string;
22
+ size: string;
23
+ /** ONNX quantization dtype. Must match an available variant in the HF repo. */
24
+ dtype: string;
25
+ }
27
26
 
28
- /** Fixed output dimensionality for Qwen3-Embedding-0.6B. */
29
- const EMBEDDING_DIM = 1024;
27
+ const HARRIER_MODEL: ModelConfig = {
28
+ id: 'onnx-community/harrier-oss-v1-270m-ONNX',
29
+ dims: 640,
30
+ pooling: 'sentence_embedding',
31
+ size: '~344MB',
32
+ dtype: 'q4',
33
+ };
30
34
 
31
- /** Lazily initialized feature extraction pipeline. */
32
- let extractor: FeatureExtractionPipeline | null = null;
35
+ function getModelConfig(): ModelConfig {
36
+ return HARRIER_MODEL;
37
+ }
38
+
39
+ /** Lazily initialized model instances. */
40
+ let pipelineExtractor: FeatureExtractionPipeline | null = null;
41
+ let autoTokenizer: any = null;
42
+ let autoModel: any = null;
43
+ let activeModel: ModelConfig | null = null;
33
44
 
34
45
  /**
35
- * Generate a 1024-dimensional embedding vector for the given text.
46
+ * Generate an embedding vector for the given text.
36
47
  *
37
- * On first call, downloads and loads the ONNX model (~600MB, cached).
48
+ * On first call, downloads and loads the ONNX model (cached after download).
38
49
  * Subsequent calls reuse the loaded model and run in ~100ms.
39
- *
40
- * The isQuery option is accepted for forward compatibility but does not
41
- * change behavior -- Qwen3 performs better without instruction prefixes.
42
- *
43
- * @param text - The text to embed.
44
- * @param options - Optional settings.
45
- * @param options.isQuery - Accepted for forward compatibility (no-op).
46
- * @returns 1024-dimensional normalized embedding as a number array.
47
50
  */
48
51
  export async function generateEmbedding(
49
52
  text: string,
50
53
  options?: { isQuery?: boolean },
51
54
  ): Promise<number[]> {
52
- if (!extractor) {
53
- console.log('Downloading embedding model (one-time setup, ~600MB)...');
54
- extractor = await pipeline('feature-extraction', MODEL_ID, {
55
- quantized: true,
56
- });
57
- console.log('Embedding model ready.');
55
+ if (!activeModel) {
56
+ activeModel = getModelConfig();
57
+ console.error(`[TotalReclaw] Downloading embedding model (${activeModel.size}, one-time setup)...`);
58
+ console.error('[TotalReclaw] This enables semantic search across your encrypted memories.');
59
+
60
+ if (activeModel.pooling === 'sentence_embedding') {
61
+ // Harrier: use AutoModel (pipeline doesn't support sentence_embedding output)
62
+ autoTokenizer = await AutoTokenizer.from_pretrained(activeModel.id);
63
+ autoModel = await AutoModel.from_pretrained(activeModel.id, {
64
+ dtype: activeModel.dtype as any,
65
+ });
66
+ } else {
67
+ // e5-small / Qwen: use pipeline
68
+ pipelineExtractor = await pipeline('feature-extraction', activeModel.id, {
69
+ dtype: activeModel.dtype as any,
70
+ });
71
+ }
72
+ console.error('[TotalReclaw] Embedding model ready. Future startups will be instant.');
58
73
  }
59
74
 
60
- const input = text;
61
- const output = await extractor(input, { pooling: 'last_token', normalize: true });
62
- // output.data is a Float32Array; convert to plain number[]
63
- return Array.from(output.data as Float32Array);
75
+ const model = activeModel!;
76
+
77
+ if (model.pooling === 'sentence_embedding') {
78
+ // Harrier: pre-pooled, pre-normalized output
79
+ const inputs = await autoTokenizer(text, { return_tensors: 'pt', padding: true });
80
+ const output = await autoModel(inputs);
81
+ return Array.from(output.sentence_embedding.data as Float32Array);
82
+ } else {
83
+ // Pipeline models: use pooling option
84
+ const input = model.pooling === 'mean' && options?.isQuery
85
+ ? `query: ${text}`
86
+ : text;
87
+ const output = await pipelineExtractor!(input, { pooling: model.pooling as any, normalize: true });
88
+ return Array.from(output.data as Float32Array);
89
+ }
64
90
  }
65
91
 
66
92
  /**
67
93
  * Get the embedding vector dimensionality.
68
- *
69
- * Always returns 1024 (fixed for Qwen3-Embedding-0.6B).
70
- * This is needed by downstream code (e.g. LSH hasher) to know the vector
71
- * size without calling the embedding model.
94
+ * Returns 640 (default/Harrier), 384 (small), or 1024 (large) depending on model selection.
72
95
  */
73
96
  export function getEmbeddingDims(): number {
74
- return EMBEDDING_DIM;
97
+ return getModelConfig().dims;
75
98
  }