@loreai/core 0.12.0 → 0.13.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bun/agents-file.d.ts +29 -8
- package/dist/bun/agents-file.d.ts.map +1 -1
- package/dist/bun/config.d.ts +1 -0
- package/dist/bun/config.d.ts.map +1 -1
- package/dist/bun/db.d.ts.map +1 -1
- package/dist/bun/distillation.d.ts +29 -0
- package/dist/bun/distillation.d.ts.map +1 -1
- package/dist/bun/embedding.d.ts +15 -1
- package/dist/bun/embedding.d.ts.map +1 -1
- package/dist/bun/gradient.d.ts +53 -5
- package/dist/bun/gradient.d.ts.map +1 -1
- package/dist/bun/index.d.ts +4 -4
- package/dist/bun/index.d.ts.map +1 -1
- package/dist/bun/index.js +696 -243
- package/dist/bun/index.js.map +4 -4
- package/dist/bun/pattern-extract.d.ts +36 -0
- package/dist/bun/pattern-extract.d.ts.map +1 -0
- package/dist/bun/recall.d.ts +1 -0
- package/dist/bun/recall.d.ts.map +1 -1
- package/dist/bun/search.d.ts +13 -1
- package/dist/bun/search.d.ts.map +1 -1
- package/dist/bun/types.d.ts +41 -1
- package/dist/bun/types.d.ts.map +1 -1
- package/dist/bun/worker-model.d.ts +22 -0
- package/dist/bun/worker-model.d.ts.map +1 -1
- package/dist/node/agents-file.d.ts +29 -8
- package/dist/node/agents-file.d.ts.map +1 -1
- package/dist/node/config.d.ts +1 -0
- package/dist/node/config.d.ts.map +1 -1
- package/dist/node/db.d.ts.map +1 -1
- package/dist/node/distillation.d.ts +29 -0
- package/dist/node/distillation.d.ts.map +1 -1
- package/dist/node/embedding.d.ts +15 -1
- package/dist/node/embedding.d.ts.map +1 -1
- package/dist/node/gradient.d.ts +53 -5
- package/dist/node/gradient.d.ts.map +1 -1
- package/dist/node/index.d.ts +4 -4
- package/dist/node/index.d.ts.map +1 -1
- package/dist/node/index.js +696 -243
- package/dist/node/index.js.map +4 -4
- package/dist/node/pattern-extract.d.ts +36 -0
- package/dist/node/pattern-extract.d.ts.map +1 -0
- package/dist/node/recall.d.ts +1 -0
- package/dist/node/recall.d.ts.map +1 -1
- package/dist/node/search.d.ts +13 -1
- package/dist/node/search.d.ts.map +1 -1
- package/dist/node/types.d.ts +41 -1
- package/dist/node/types.d.ts.map +1 -1
- package/dist/node/worker-model.d.ts +22 -0
- package/dist/node/worker-model.d.ts.map +1 -1
- package/dist/types/agents-file.d.ts +29 -8
- package/dist/types/agents-file.d.ts.map +1 -1
- package/dist/types/config.d.ts +1 -0
- package/dist/types/config.d.ts.map +1 -1
- package/dist/types/db.d.ts.map +1 -1
- package/dist/types/distillation.d.ts +29 -0
- package/dist/types/distillation.d.ts.map +1 -1
- package/dist/types/embedding.d.ts +15 -1
- package/dist/types/embedding.d.ts.map +1 -1
- package/dist/types/gradient.d.ts +53 -5
- package/dist/types/gradient.d.ts.map +1 -1
- package/dist/types/index.d.ts +4 -4
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/pattern-extract.d.ts +36 -0
- package/dist/types/pattern-extract.d.ts.map +1 -0
- package/dist/types/recall.d.ts +1 -0
- package/dist/types/recall.d.ts.map +1 -1
- package/dist/types/search.d.ts +13 -1
- package/dist/types/search.d.ts.map +1 -1
- package/dist/types/types.d.ts +41 -1
- package/dist/types/types.d.ts.map +1 -1
- package/dist/types/worker-model.d.ts +22 -0
- package/dist/types/worker-model.d.ts.map +1 -1
- package/package.json +3 -2
- package/src/agents-file.ts +111 -28
- package/src/config.ts +25 -18
- package/src/curator.ts +2 -2
- package/src/db.ts +19 -2
- package/src/distillation.ts +152 -15
- package/src/embedding.ts +158 -14
- package/src/gradient.ts +398 -227
- package/src/index.ts +13 -5
- package/src/pattern-extract.ts +108 -0
- package/src/recall.ts +124 -6
- package/src/search.ts +37 -1
- package/src/types.ts +41 -1
- package/src/worker-model.ts +142 -5
package/src/distillation.ts
CHANGED
|
@@ -3,7 +3,9 @@ import { config } from "./config";
|
|
|
3
3
|
import * as temporal from "./temporal";
|
|
4
4
|
import { CHUNK_TERMINATOR } from "./temporal";
|
|
5
5
|
import * as embedding from "./embedding";
|
|
6
|
+
import * as ltm from "./ltm";
|
|
6
7
|
import * as log from "./log";
|
|
8
|
+
import { extractPatterns } from "./pattern-extract";
|
|
7
9
|
import {
|
|
8
10
|
DISTILLATION_SYSTEM,
|
|
9
11
|
distillationUser,
|
|
@@ -328,6 +330,10 @@ export type Distillation = {
|
|
|
328
330
|
generation: number;
|
|
329
331
|
token_count: number;
|
|
330
332
|
created_at: number;
|
|
333
|
+
/** k/√N compression ratio. NULL for pre-v12 rows or meta-distillations. */
|
|
334
|
+
r_compression: number | null;
|
|
335
|
+
/** Temporal clustering [0,1]. NULL for pre-v12 rows or meta-distillations. */
|
|
336
|
+
c_norm: number | null;
|
|
331
337
|
};
|
|
332
338
|
|
|
333
339
|
/**
|
|
@@ -351,8 +357,8 @@ export function loadForSession(
|
|
|
351
357
|
): Distillation[] {
|
|
352
358
|
const pid = ensureProject(projectPath);
|
|
353
359
|
const sql = includeArchived
|
|
354
|
-
? "SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at FROM distillations WHERE project_id = ? AND session_id = ? ORDER BY created_at ASC"
|
|
355
|
-
: "SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at FROM distillations WHERE project_id = ? AND session_id = ? AND archived = 0 ORDER BY created_at ASC";
|
|
360
|
+
? "SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at, r_compression, c_norm FROM distillations WHERE project_id = ? AND session_id = ? ORDER BY created_at ASC"
|
|
361
|
+
: "SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at, r_compression, c_norm FROM distillations WHERE project_id = ? AND session_id = ? AND archived = 0 ORDER BY created_at ASC";
|
|
356
362
|
const rows = db()
|
|
357
363
|
.query(sql)
|
|
358
364
|
.all(pid, sessionID) as Array<{
|
|
@@ -364,6 +370,8 @@ export function loadForSession(
|
|
|
364
370
|
generation: number;
|
|
365
371
|
token_count: number;
|
|
366
372
|
created_at: number;
|
|
373
|
+
r_compression: number | null;
|
|
374
|
+
c_norm: number | null;
|
|
367
375
|
}>;
|
|
368
376
|
return rows.map((r) => ({
|
|
369
377
|
...r,
|
|
@@ -377,6 +385,8 @@ function storeDistillation(input: {
|
|
|
377
385
|
observations: string;
|
|
378
386
|
sourceIDs: string[];
|
|
379
387
|
generation: number;
|
|
388
|
+
rCompression?: number;
|
|
389
|
+
cNorm?: number;
|
|
380
390
|
}): string {
|
|
381
391
|
const pid = ensureProject(input.projectPath);
|
|
382
392
|
const id = crypto.randomUUID();
|
|
@@ -384,8 +394,8 @@ function storeDistillation(input: {
|
|
|
384
394
|
const tokens = Math.ceil(input.observations.length / 3);
|
|
385
395
|
db()
|
|
386
396
|
.query(
|
|
387
|
-
`INSERT INTO distillations (id, project_id, session_id, narrative, facts, observations, source_ids, generation, token_count, created_at)
|
|
388
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
397
|
+
`INSERT INTO distillations (id, project_id, session_id, narrative, facts, observations, source_ids, generation, token_count, created_at, r_compression, c_norm)
|
|
398
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
389
399
|
)
|
|
390
400
|
.run(
|
|
391
401
|
id,
|
|
@@ -398,6 +408,8 @@ function storeDistillation(input: {
|
|
|
398
408
|
input.generation,
|
|
399
409
|
tokens,
|
|
400
410
|
Date.now(),
|
|
411
|
+
input.rCompression ?? null,
|
|
412
|
+
input.cNorm ?? null,
|
|
401
413
|
);
|
|
402
414
|
return id;
|
|
403
415
|
}
|
|
@@ -420,7 +432,7 @@ function loadGen0(projectPath: string, sessionID: string): Distillation[] {
|
|
|
420
432
|
const pid = ensureProject(projectPath);
|
|
421
433
|
const rows = db()
|
|
422
434
|
.query(
|
|
423
|
-
"SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at FROM distillations WHERE project_id = ? AND session_id = ? AND generation = 0 AND archived = 0 ORDER BY created_at ASC",
|
|
435
|
+
"SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at, r_compression, c_norm FROM distillations WHERE project_id = ? AND session_id = ? AND generation = 0 AND archived = 0 ORDER BY created_at ASC",
|
|
424
436
|
)
|
|
425
437
|
.all(pid, sessionID) as Array<{
|
|
426
438
|
id: string;
|
|
@@ -431,6 +443,8 @@ function loadGen0(projectPath: string, sessionID: string): Distillation[] {
|
|
|
431
443
|
generation: number;
|
|
432
444
|
token_count: number;
|
|
433
445
|
created_at: number;
|
|
446
|
+
r_compression: number | null;
|
|
447
|
+
c_norm: number | null;
|
|
434
448
|
}>;
|
|
435
449
|
return rows.map((r) => ({
|
|
436
450
|
...r,
|
|
@@ -514,6 +528,17 @@ export async function run(input: {
|
|
|
514
528
|
model?: { providerID: string; modelID: string };
|
|
515
529
|
/** Skip minMessages threshold check — distill whatever is pending */
|
|
516
530
|
force?: boolean;
|
|
531
|
+
/** Skip meta-distillation even when gen-0 count exceeds the threshold.
|
|
532
|
+
* Used when the upstream prompt cache is likely still warm — meta-distillation
|
|
533
|
+
* rewrites distillation row IDs, which invalidates the distilled prefix cache
|
|
534
|
+
* and causes a cache bust on the next turn. Callers should set this to true
|
|
535
|
+
* when `Date.now() - getLastTurnAt(sessionID) < cacheTTL`. */
|
|
536
|
+
skipMeta?: boolean;
|
|
537
|
+
/** When true, all LLM calls in this run are marked urgent and bypass the
|
|
538
|
+
* batch queue (if one is active). Use for compaction and overflow recovery
|
|
539
|
+
* where the caller is blocking on the result. Background/idle distillation
|
|
540
|
+
* should leave this false to benefit from batch API 50% cost savings. */
|
|
541
|
+
urgent?: boolean;
|
|
517
542
|
}): Promise<{ rounds: number; distilled: number }> {
|
|
518
543
|
// Reset orphaned messages (marked distilled by a deleted/migrated distillation)
|
|
519
544
|
const orphans = resetOrphans(input.projectPath, input.sessionID);
|
|
@@ -547,6 +572,7 @@ export async function run(input: {
|
|
|
547
572
|
sessionID: input.sessionID,
|
|
548
573
|
messages: segment,
|
|
549
574
|
model: input.model,
|
|
575
|
+
urgent: input.urgent,
|
|
550
576
|
});
|
|
551
577
|
if (result) {
|
|
552
578
|
distilled += segment.length;
|
|
@@ -555,8 +581,11 @@ export async function run(input: {
|
|
|
555
581
|
}
|
|
556
582
|
}
|
|
557
583
|
|
|
558
|
-
// Check if meta-distillation is needed
|
|
584
|
+
// Check if meta-distillation is needed (skip when cache is warm to avoid
|
|
585
|
+
// prefix cache invalidation — row IDs change after meta-distill, busting
|
|
586
|
+
// the prompt cache on the next turn).
|
|
559
587
|
if (
|
|
588
|
+
!input.skipMeta &&
|
|
560
589
|
gen0Count(input.projectPath, input.sessionID) >=
|
|
561
590
|
cfg.distillation.metaThreshold
|
|
562
591
|
) {
|
|
@@ -565,6 +594,7 @@ export async function run(input: {
|
|
|
565
594
|
projectPath: input.projectPath,
|
|
566
595
|
sessionID: input.sessionID,
|
|
567
596
|
model: input.model,
|
|
597
|
+
urgent: input.urgent,
|
|
568
598
|
});
|
|
569
599
|
rounds++;
|
|
570
600
|
}
|
|
@@ -582,6 +612,7 @@ async function distillSegment(input: {
|
|
|
582
612
|
sessionID: string;
|
|
583
613
|
messages: TemporalMessage[];
|
|
584
614
|
model?: { providerID: string; modelID: string };
|
|
615
|
+
urgent?: boolean;
|
|
585
616
|
}): Promise<DistillationResult | null> {
|
|
586
617
|
const prior = latestObservations(input.projectPath, input.sessionID);
|
|
587
618
|
const text = messagesToText(input.messages);
|
|
@@ -604,29 +635,30 @@ async function distillSegment(input: {
|
|
|
604
635
|
const responseText = await input.llm.prompt(
|
|
605
636
|
DISTILLATION_SYSTEM,
|
|
606
637
|
userContent,
|
|
607
|
-
{ model, workerID: "lore-distill" },
|
|
638
|
+
{ model, workerID: "lore-distill", thinking: false, urgent: input.urgent, sessionID: input.sessionID },
|
|
608
639
|
);
|
|
609
640
|
if (!responseText) return null;
|
|
610
641
|
|
|
611
642
|
const result = parseDistillationResult(responseText);
|
|
612
643
|
if (!result) return null;
|
|
613
644
|
|
|
645
|
+
// Compute context health metrics before storing.
|
|
646
|
+
const distilledTokens = Math.ceil(result.observations.length / 3);
|
|
647
|
+
const sourceTokens = input.messages.reduce((sum, m) => sum + m.tokens, 0);
|
|
648
|
+
const rComp = compressionRatio(distilledTokens, sourceTokens);
|
|
649
|
+
const cNorm = temporal.temporalCnorm(input.messages.map((m) => m.created_at));
|
|
650
|
+
|
|
614
651
|
const distillId = storeDistillation({
|
|
615
652
|
projectPath: input.projectPath,
|
|
616
653
|
sessionID: input.sessionID,
|
|
617
654
|
observations: result.observations,
|
|
618
655
|
sourceIDs: input.messages.map((m) => m.id),
|
|
619
656
|
generation: 0,
|
|
657
|
+
rCompression: rComp,
|
|
658
|
+
cNorm,
|
|
620
659
|
});
|
|
621
660
|
temporal.markDistilled(input.messages.map((m) => m.id));
|
|
622
661
|
|
|
623
|
-
// Diagnostic: log compression health and temporal clustering metrics.
|
|
624
|
-
// R_compression (k/√N): < 1.0 signals likely lossy distillation.
|
|
625
|
-
// C_norm: 0 = uniform timestamps, 1 = dominated by distant past.
|
|
626
|
-
const distilledTokens = Math.ceil(result.observations.length / 3);
|
|
627
|
-
const sourceTokens = input.messages.reduce((sum, m) => sum + m.tokens, 0);
|
|
628
|
-
const rComp = compressionRatio(distilledTokens, sourceTokens);
|
|
629
|
-
const cNorm = temporal.temporalCnorm(input.messages.map((m) => m.created_at));
|
|
630
662
|
log.info(
|
|
631
663
|
`distill segment: ${input.messages.length} msgs, ` +
|
|
632
664
|
`${sourceTokens}→${distilledTokens} tokens, ` +
|
|
@@ -638,6 +670,24 @@ async function distillSegment(input: {
|
|
|
638
670
|
embedding.embedDistillation(distillId, result.observations);
|
|
639
671
|
}
|
|
640
672
|
|
|
673
|
+
// Fire-and-forget: extract decision/preference patterns → knowledge entries
|
|
674
|
+
if (config().knowledge.enabled) {
|
|
675
|
+
for (const pat of extractPatterns(result.observations)) {
|
|
676
|
+
try {
|
|
677
|
+
ltm.create({
|
|
678
|
+
projectPath: input.projectPath,
|
|
679
|
+
category: pat.category,
|
|
680
|
+
title: pat.title,
|
|
681
|
+
content: pat.content,
|
|
682
|
+
session: input.sessionID,
|
|
683
|
+
scope: "project",
|
|
684
|
+
});
|
|
685
|
+
} catch {
|
|
686
|
+
// Dedup guard in ltm.create() handles duplicates — swallow errors
|
|
687
|
+
}
|
|
688
|
+
}
|
|
689
|
+
}
|
|
690
|
+
|
|
641
691
|
return result;
|
|
642
692
|
}
|
|
643
693
|
|
|
@@ -654,6 +704,7 @@ export async function metaDistill(input: {
|
|
|
654
704
|
projectPath: string;
|
|
655
705
|
sessionID: string;
|
|
656
706
|
model?: { providerID: string; modelID: string };
|
|
707
|
+
urgent?: boolean;
|
|
657
708
|
}): Promise<DistillationResult | null> {
|
|
658
709
|
const existing = loadGen0(input.projectPath, input.sessionID);
|
|
659
710
|
|
|
@@ -681,7 +732,7 @@ export async function metaDistill(input: {
|
|
|
681
732
|
const responseText = await input.llm.prompt(
|
|
682
733
|
RECURSIVE_SYSTEM,
|
|
683
734
|
userContent,
|
|
684
|
-
{ model, workerID: "lore-distill" },
|
|
735
|
+
{ model, workerID: "lore-distill", thinking: false, urgent: input.urgent, sessionID: input.sessionID },
|
|
685
736
|
);
|
|
686
737
|
if (!responseText) return null;
|
|
687
738
|
|
|
@@ -732,5 +783,91 @@ export async function metaDistill(input: {
|
|
|
732
783
|
embedding.embedDistillation(metaId, result.observations);
|
|
733
784
|
}
|
|
734
785
|
|
|
786
|
+
// Fire-and-forget: extract decision/preference patterns → knowledge entries
|
|
787
|
+
if (config().knowledge.enabled) {
|
|
788
|
+
for (const pat of extractPatterns(result.observations)) {
|
|
789
|
+
try {
|
|
790
|
+
ltm.create({
|
|
791
|
+
projectPath: input.projectPath,
|
|
792
|
+
category: pat.category,
|
|
793
|
+
title: pat.title,
|
|
794
|
+
content: pat.content,
|
|
795
|
+
session: input.sessionID,
|
|
796
|
+
scope: "project",
|
|
797
|
+
});
|
|
798
|
+
} catch {
|
|
799
|
+
// Dedup guard in ltm.create() handles duplicates — swallow errors
|
|
800
|
+
}
|
|
801
|
+
}
|
|
802
|
+
}
|
|
803
|
+
|
|
735
804
|
return result;
|
|
736
805
|
}
|
|
806
|
+
|
|
807
|
+
// ---------------------------------------------------------------------------
|
|
808
|
+
// Retroactive metric backfill
|
|
809
|
+
// ---------------------------------------------------------------------------
|
|
810
|
+
|
|
811
|
+
/**
|
|
812
|
+
* Backfill `r_compression` and `c_norm` for distillations that were created
|
|
813
|
+
* before schema v12 (or before PR #113 added the computation).
|
|
814
|
+
*
|
|
815
|
+
* For each distillation with NULL metrics, loads source temporal messages via
|
|
816
|
+
* `source_ids`, computes `compressionRatio()` and `temporalCnorm()`, and
|
|
817
|
+
* writes the values back. Skips rows where source messages have been pruned
|
|
818
|
+
* or source_ids is empty.
|
|
819
|
+
*
|
|
820
|
+
* Designed to run once at startup — idempotent (only touches NULL rows).
|
|
821
|
+
* Returns the number of rows updated.
|
|
822
|
+
*/
|
|
823
|
+
export function backfillMetrics(): number {
|
|
824
|
+
const rows = db()
|
|
825
|
+
.query(
|
|
826
|
+
"SELECT id, source_ids, token_count FROM distillations WHERE r_compression IS NULL",
|
|
827
|
+
)
|
|
828
|
+
.all() as Array<{
|
|
829
|
+
id: string;
|
|
830
|
+
source_ids: string;
|
|
831
|
+
token_count: number;
|
|
832
|
+
}>;
|
|
833
|
+
|
|
834
|
+
if (!rows.length) return 0;
|
|
835
|
+
|
|
836
|
+
const update = db().prepare(
|
|
837
|
+
"UPDATE distillations SET r_compression = ?, c_norm = ? WHERE id = ?",
|
|
838
|
+
);
|
|
839
|
+
|
|
840
|
+
let updated = 0;
|
|
841
|
+
|
|
842
|
+
for (const row of rows) {
|
|
843
|
+
const sourceIds = parseSourceIds(row.source_ids);
|
|
844
|
+
if (!sourceIds.length) continue;
|
|
845
|
+
|
|
846
|
+
// Load source temporal messages — they may have been pruned.
|
|
847
|
+
const placeholders = sourceIds.map(() => "?").join(",");
|
|
848
|
+
const sources = db()
|
|
849
|
+
.query(
|
|
850
|
+
`SELECT tokens, created_at FROM temporal_messages WHERE id IN (${placeholders})`,
|
|
851
|
+
)
|
|
852
|
+
.all(...sourceIds) as Array<{ tokens: number; created_at: number }>;
|
|
853
|
+
|
|
854
|
+
if (!sources.length) continue;
|
|
855
|
+
|
|
856
|
+
const sourceTokens = sources.reduce((sum, s) => sum + s.tokens, 0);
|
|
857
|
+
const timestamps = sources.map((s) => s.created_at);
|
|
858
|
+
|
|
859
|
+
const rComp = compressionRatio(row.token_count, sourceTokens);
|
|
860
|
+
const cNorm = temporal.temporalCnorm(timestamps);
|
|
861
|
+
|
|
862
|
+
update.run(rComp, cNorm, row.id);
|
|
863
|
+
updated++;
|
|
864
|
+
}
|
|
865
|
+
|
|
866
|
+
if (updated > 0) {
|
|
867
|
+
log.info(
|
|
868
|
+
`backfilled metrics for ${updated} distillations (${rows.length - updated} skipped — missing sources)`,
|
|
869
|
+
);
|
|
870
|
+
}
|
|
871
|
+
|
|
872
|
+
return updated;
|
|
873
|
+
}
|
package/src/embedding.ts
CHANGED
|
@@ -132,12 +132,82 @@ class OpenAIProvider implements EmbeddingProvider {
|
|
|
132
132
|
}
|
|
133
133
|
}
|
|
134
134
|
|
|
135
|
+
// ---------------------------------------------------------------------------
|
|
136
|
+
// Local provider (fastembed + ONNX Runtime)
|
|
137
|
+
// ---------------------------------------------------------------------------
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Local embedding provider using fastembed (bge-small-en-v1.5 by default).
|
|
141
|
+
*
|
|
142
|
+
* No API key required — runs entirely on-device via ONNX Runtime.
|
|
143
|
+
* Model files are downloaded on first use (~33MB) and cached in
|
|
144
|
+
* `~/.cache/fastembed`. Subsequent inits load from disk in ~350ms.
|
|
145
|
+
*
|
|
146
|
+
* Uses dynamic import so the module is only loaded when the "local"
|
|
147
|
+
* provider is actually selected — avoids startup cost and allows
|
|
148
|
+
* graceful fallback if fastembed is not installed.
|
|
149
|
+
*/
|
|
150
|
+
class LocalProvider implements EmbeddingProvider {
|
|
151
|
+
readonly maxBatchSize = 256;
|
|
152
|
+
private model: unknown | null = null;
|
|
153
|
+
private initPromise: Promise<unknown> | null = null;
|
|
154
|
+
private modelName: string;
|
|
155
|
+
|
|
156
|
+
constructor(modelName: string) {
|
|
157
|
+
this.modelName = modelName;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
private async getModel(): Promise<unknown> {
|
|
161
|
+
if (this.model) return this.model;
|
|
162
|
+
if (!this.initPromise) {
|
|
163
|
+
this.initPromise = (async () => {
|
|
164
|
+
const { EmbeddingModel, FlagEmbedding } = await import("fastembed");
|
|
165
|
+
// Map config model string to EmbeddingModel enum value.
|
|
166
|
+
// If the configured model matches an enum key, use it; otherwise try
|
|
167
|
+
// the raw string as a model name (CUSTOM model support in fastembed).
|
|
168
|
+
const enumValue = (EmbeddingModel as Record<string, string>)[this.modelName];
|
|
169
|
+
// fastembed's init() has overloaded signatures expecting specific enum
|
|
170
|
+
// members, but we resolve the model dynamically from config. The enum
|
|
171
|
+
// lookup guarantees a valid value at runtime; cast to satisfy the type.
|
|
172
|
+
const m = await FlagEmbedding.init({
|
|
173
|
+
model: enumValue ?? this.modelName,
|
|
174
|
+
} as { model: typeof EmbeddingModel.BGESmallENV15 });
|
|
175
|
+
this.model = m;
|
|
176
|
+
return m;
|
|
177
|
+
})();
|
|
178
|
+
}
|
|
179
|
+
return this.initPromise;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
async embed(texts: string[], inputType: "document" | "query"): Promise<Float32Array[]> {
|
|
183
|
+
const model = (await this.getModel()) as {
|
|
184
|
+
queryEmbed(text: string): Promise<number[]>;
|
|
185
|
+
passageEmbed(texts: string[], batchSize?: number): AsyncGenerator<number[][]>;
|
|
186
|
+
};
|
|
187
|
+
|
|
188
|
+
if (inputType === "query" && texts.length === 1) {
|
|
189
|
+
const vec = await model.queryEmbed(texts[0]);
|
|
190
|
+
return [new Float32Array(vec)];
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// passageEmbed returns an async generator of batches
|
|
194
|
+
const results: Float32Array[] = [];
|
|
195
|
+
for await (const batch of model.passageEmbed(texts)) {
|
|
196
|
+
for (const vec of batch) {
|
|
197
|
+
results.push(new Float32Array(vec));
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
return results;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
135
204
|
// ---------------------------------------------------------------------------
|
|
136
205
|
// Provider resolution
|
|
137
206
|
// ---------------------------------------------------------------------------
|
|
138
207
|
|
|
139
208
|
/** Default models per provider — used when config doesn't override. */
|
|
140
209
|
const PROVIDER_DEFAULTS: Record<string, { model: string; dimensions: number }> = {
|
|
210
|
+
local: { model: "BGESmallENV15", dimensions: 384 },
|
|
141
211
|
voyage: { model: "voyage-code-3", dimensions: 1024 },
|
|
142
212
|
openai: { model: "text-embedding-3-small", dimensions: 1536 },
|
|
143
213
|
};
|
|
@@ -165,23 +235,36 @@ function getProvider(): EmbeddingProvider | null {
|
|
|
165
235
|
}
|
|
166
236
|
|
|
167
237
|
const providerName = cfg.provider;
|
|
168
|
-
const
|
|
169
|
-
if (!apiKey) {
|
|
170
|
-
cachedProvider = null;
|
|
171
|
-
return null;
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
const defaults = PROVIDER_DEFAULTS[providerName];
|
|
175
|
-
const model = cfg.model === defaults?.model ? cfg.model : cfg.model;
|
|
176
|
-
const dimensions = cfg.dimensions;
|
|
238
|
+
const model = cfg.model;
|
|
177
239
|
|
|
178
240
|
switch (providerName) {
|
|
179
|
-
case "
|
|
180
|
-
|
|
241
|
+
case "local": {
|
|
242
|
+
try {
|
|
243
|
+
cachedProvider = new LocalProvider(model);
|
|
244
|
+
} catch {
|
|
245
|
+
log.info("local embedding provider unavailable (fastembed not installed)");
|
|
246
|
+
cachedProvider = null;
|
|
247
|
+
}
|
|
181
248
|
break;
|
|
182
|
-
|
|
183
|
-
|
|
249
|
+
}
|
|
250
|
+
case "voyage": {
|
|
251
|
+
const apiKey = getProviderApiKey(providerName);
|
|
252
|
+
if (!apiKey) {
|
|
253
|
+
cachedProvider = null;
|
|
254
|
+
return null;
|
|
255
|
+
}
|
|
256
|
+
cachedProvider = new VoyageProvider(apiKey, model, cfg.dimensions);
|
|
184
257
|
break;
|
|
258
|
+
}
|
|
259
|
+
case "openai": {
|
|
260
|
+
const apiKey = getProviderApiKey(providerName);
|
|
261
|
+
if (!apiKey) {
|
|
262
|
+
cachedProvider = null;
|
|
263
|
+
return null;
|
|
264
|
+
}
|
|
265
|
+
cachedProvider = new OpenAIProvider(apiKey, model, cfg.dimensions);
|
|
266
|
+
break;
|
|
267
|
+
}
|
|
185
268
|
default:
|
|
186
269
|
log.info(`unknown embedding provider: ${providerName}`);
|
|
187
270
|
cachedProvider = null;
|
|
@@ -433,13 +516,74 @@ export function checkConfigChange(): boolean {
|
|
|
433
516
|
return true;
|
|
434
517
|
}
|
|
435
518
|
|
|
519
|
+
// ---------------------------------------------------------------------------
|
|
520
|
+
// Startup backfill — single entry point for all hosts
|
|
521
|
+
// ---------------------------------------------------------------------------
|
|
522
|
+
|
|
523
|
+
/**
|
|
524
|
+
* Run all embedding backfills and log coverage stats.
|
|
525
|
+
*
|
|
526
|
+
* This is the canonical entry point that every host adapter (OpenCode, Pi,
|
|
527
|
+
* future ACP) should call once during init. It:
|
|
528
|
+
* 1. Detects config changes (provider swap) and clears stale embeddings
|
|
529
|
+
* 2. Backfills knowledge entries missing embeddings
|
|
530
|
+
* 3. Backfills non-archived distillations missing embeddings
|
|
531
|
+
* 4. Logs a one-line coverage summary to stderr (always visible, not gated)
|
|
532
|
+
*
|
|
533
|
+
* Fire-and-forget: callers should `.catch()` — embedding failures must not
|
|
534
|
+
* block plugin initialization.
|
|
535
|
+
*/
|
|
536
|
+
export async function runStartupBackfill(): Promise<void> {
|
|
537
|
+
if (!isAvailable()) return;
|
|
538
|
+
|
|
539
|
+
const knowledgeEmbedded = await backfillEmbeddings();
|
|
540
|
+
const distillationEmbedded = await backfillDistillationEmbeddings();
|
|
541
|
+
|
|
542
|
+
// Coverage stats — always log to stderr so the problem is visible.
|
|
543
|
+
const kTotal = (
|
|
544
|
+
db()
|
|
545
|
+
.query("SELECT COUNT(*) as n FROM knowledge WHERE confidence > 0.2")
|
|
546
|
+
.get() as { n: number }
|
|
547
|
+
).n;
|
|
548
|
+
const kWithEmb = (
|
|
549
|
+
db()
|
|
550
|
+
.query(
|
|
551
|
+
"SELECT COUNT(*) as n FROM knowledge WHERE embedding IS NOT NULL AND confidence > 0.2",
|
|
552
|
+
)
|
|
553
|
+
.get() as { n: number }
|
|
554
|
+
).n;
|
|
555
|
+
const dTotal = (
|
|
556
|
+
db()
|
|
557
|
+
.query(
|
|
558
|
+
"SELECT COUNT(*) as n FROM distillations WHERE archived = 0 AND observations != ''",
|
|
559
|
+
)
|
|
560
|
+
.get() as { n: number }
|
|
561
|
+
).n;
|
|
562
|
+
const dWithEmb = (
|
|
563
|
+
db()
|
|
564
|
+
.query(
|
|
565
|
+
"SELECT COUNT(*) as n FROM distillations WHERE embedding IS NOT NULL AND archived = 0",
|
|
566
|
+
)
|
|
567
|
+
.get() as { n: number }
|
|
568
|
+
).n;
|
|
569
|
+
|
|
570
|
+
const parts: string[] = [];
|
|
571
|
+
if (knowledgeEmbedded > 0 || distillationEmbedded > 0) {
|
|
572
|
+
parts.push(`backfilled ${knowledgeEmbedded} knowledge + ${distillationEmbedded} distillations`);
|
|
573
|
+
}
|
|
574
|
+
parts.push(
|
|
575
|
+
`coverage: knowledge ${kWithEmb}/${kTotal}, distillations ${dWithEmb}/${dTotal}`,
|
|
576
|
+
);
|
|
577
|
+
log.info(`embedding startup: ${parts.join("; ")}`);
|
|
578
|
+
}
|
|
579
|
+
|
|
436
580
|
// ---------------------------------------------------------------------------
|
|
437
581
|
// Backfill — knowledge
|
|
438
582
|
// ---------------------------------------------------------------------------
|
|
439
583
|
|
|
440
584
|
/**
|
|
441
585
|
* Embed all knowledge entries that are missing embeddings.
|
|
442
|
-
* Called
|
|
586
|
+
* Called by `runStartupBackfill()`.
|
|
443
587
|
* Also handles config changes: if provider/model/dimensions changed, clears
|
|
444
588
|
* stale embeddings first, then re-embeds all entries.
|
|
445
589
|
* Returns the number of entries embedded.
|