@loreai/core 0.12.0 → 0.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/dist/bun/agents-file.d.ts +29 -8
  2. package/dist/bun/agents-file.d.ts.map +1 -1
  3. package/dist/bun/config.d.ts +1 -0
  4. package/dist/bun/config.d.ts.map +1 -1
  5. package/dist/bun/db.d.ts.map +1 -1
  6. package/dist/bun/distillation.d.ts +29 -0
  7. package/dist/bun/distillation.d.ts.map +1 -1
  8. package/dist/bun/embedding.d.ts +15 -1
  9. package/dist/bun/embedding.d.ts.map +1 -1
  10. package/dist/bun/gradient.d.ts +53 -5
  11. package/dist/bun/gradient.d.ts.map +1 -1
  12. package/dist/bun/index.d.ts +4 -4
  13. package/dist/bun/index.d.ts.map +1 -1
  14. package/dist/bun/index.js +696 -243
  15. package/dist/bun/index.js.map +4 -4
  16. package/dist/bun/pattern-extract.d.ts +36 -0
  17. package/dist/bun/pattern-extract.d.ts.map +1 -0
  18. package/dist/bun/recall.d.ts +1 -0
  19. package/dist/bun/recall.d.ts.map +1 -1
  20. package/dist/bun/search.d.ts +13 -1
  21. package/dist/bun/search.d.ts.map +1 -1
  22. package/dist/bun/types.d.ts +41 -1
  23. package/dist/bun/types.d.ts.map +1 -1
  24. package/dist/bun/worker-model.d.ts +22 -0
  25. package/dist/bun/worker-model.d.ts.map +1 -1
  26. package/dist/node/agents-file.d.ts +29 -8
  27. package/dist/node/agents-file.d.ts.map +1 -1
  28. package/dist/node/config.d.ts +1 -0
  29. package/dist/node/config.d.ts.map +1 -1
  30. package/dist/node/db.d.ts.map +1 -1
  31. package/dist/node/distillation.d.ts +29 -0
  32. package/dist/node/distillation.d.ts.map +1 -1
  33. package/dist/node/embedding.d.ts +15 -1
  34. package/dist/node/embedding.d.ts.map +1 -1
  35. package/dist/node/gradient.d.ts +53 -5
  36. package/dist/node/gradient.d.ts.map +1 -1
  37. package/dist/node/index.d.ts +4 -4
  38. package/dist/node/index.d.ts.map +1 -1
  39. package/dist/node/index.js +696 -243
  40. package/dist/node/index.js.map +4 -4
  41. package/dist/node/pattern-extract.d.ts +36 -0
  42. package/dist/node/pattern-extract.d.ts.map +1 -0
  43. package/dist/node/recall.d.ts +1 -0
  44. package/dist/node/recall.d.ts.map +1 -1
  45. package/dist/node/search.d.ts +13 -1
  46. package/dist/node/search.d.ts.map +1 -1
  47. package/dist/node/types.d.ts +41 -1
  48. package/dist/node/types.d.ts.map +1 -1
  49. package/dist/node/worker-model.d.ts +22 -0
  50. package/dist/node/worker-model.d.ts.map +1 -1
  51. package/dist/types/agents-file.d.ts +29 -8
  52. package/dist/types/agents-file.d.ts.map +1 -1
  53. package/dist/types/config.d.ts +1 -0
  54. package/dist/types/config.d.ts.map +1 -1
  55. package/dist/types/db.d.ts.map +1 -1
  56. package/dist/types/distillation.d.ts +29 -0
  57. package/dist/types/distillation.d.ts.map +1 -1
  58. package/dist/types/embedding.d.ts +15 -1
  59. package/dist/types/embedding.d.ts.map +1 -1
  60. package/dist/types/gradient.d.ts +53 -5
  61. package/dist/types/gradient.d.ts.map +1 -1
  62. package/dist/types/index.d.ts +4 -4
  63. package/dist/types/index.d.ts.map +1 -1
  64. package/dist/types/pattern-extract.d.ts +36 -0
  65. package/dist/types/pattern-extract.d.ts.map +1 -0
  66. package/dist/types/recall.d.ts +1 -0
  67. package/dist/types/recall.d.ts.map +1 -1
  68. package/dist/types/search.d.ts +13 -1
  69. package/dist/types/search.d.ts.map +1 -1
  70. package/dist/types/types.d.ts +41 -1
  71. package/dist/types/types.d.ts.map +1 -1
  72. package/dist/types/worker-model.d.ts +22 -0
  73. package/dist/types/worker-model.d.ts.map +1 -1
  74. package/package.json +3 -2
  75. package/src/agents-file.ts +111 -28
  76. package/src/config.ts +25 -18
  77. package/src/curator.ts +2 -2
  78. package/src/db.ts +19 -2
  79. package/src/distillation.ts +152 -15
  80. package/src/embedding.ts +158 -14
  81. package/src/gradient.ts +398 -227
  82. package/src/index.ts +13 -5
  83. package/src/pattern-extract.ts +108 -0
  84. package/src/recall.ts +124 -6
  85. package/src/search.ts +37 -1
  86. package/src/types.ts +41 -1
  87. package/src/worker-model.ts +142 -5
@@ -3,7 +3,9 @@ import { config } from "./config";
3
3
  import * as temporal from "./temporal";
4
4
  import { CHUNK_TERMINATOR } from "./temporal";
5
5
  import * as embedding from "./embedding";
6
+ import * as ltm from "./ltm";
6
7
  import * as log from "./log";
8
+ import { extractPatterns } from "./pattern-extract";
7
9
  import {
8
10
  DISTILLATION_SYSTEM,
9
11
  distillationUser,
@@ -328,6 +330,10 @@ export type Distillation = {
328
330
  generation: number;
329
331
  token_count: number;
330
332
  created_at: number;
333
+ /** k/√N compression ratio. NULL for pre-v12 rows or meta-distillations. */
334
+ r_compression: number | null;
335
+ /** Temporal clustering [0,1]. NULL for pre-v12 rows or meta-distillations. */
336
+ c_norm: number | null;
331
337
  };
332
338
 
333
339
  /**
@@ -351,8 +357,8 @@ export function loadForSession(
351
357
  ): Distillation[] {
352
358
  const pid = ensureProject(projectPath);
353
359
  const sql = includeArchived
354
- ? "SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at FROM distillations WHERE project_id = ? AND session_id = ? ORDER BY created_at ASC"
355
- : "SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at FROM distillations WHERE project_id = ? AND session_id = ? AND archived = 0 ORDER BY created_at ASC";
360
+ ? "SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at, r_compression, c_norm FROM distillations WHERE project_id = ? AND session_id = ? ORDER BY created_at ASC"
361
+ : "SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at, r_compression, c_norm FROM distillations WHERE project_id = ? AND session_id = ? AND archived = 0 ORDER BY created_at ASC";
356
362
  const rows = db()
357
363
  .query(sql)
358
364
  .all(pid, sessionID) as Array<{
@@ -364,6 +370,8 @@ export function loadForSession(
364
370
  generation: number;
365
371
  token_count: number;
366
372
  created_at: number;
373
+ r_compression: number | null;
374
+ c_norm: number | null;
367
375
  }>;
368
376
  return rows.map((r) => ({
369
377
  ...r,
@@ -377,6 +385,8 @@ function storeDistillation(input: {
377
385
  observations: string;
378
386
  sourceIDs: string[];
379
387
  generation: number;
388
+ rCompression?: number;
389
+ cNorm?: number;
380
390
  }): string {
381
391
  const pid = ensureProject(input.projectPath);
382
392
  const id = crypto.randomUUID();
@@ -384,8 +394,8 @@ function storeDistillation(input: {
384
394
  const tokens = Math.ceil(input.observations.length / 3);
385
395
  db()
386
396
  .query(
387
- `INSERT INTO distillations (id, project_id, session_id, narrative, facts, observations, source_ids, generation, token_count, created_at)
388
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
397
+ `INSERT INTO distillations (id, project_id, session_id, narrative, facts, observations, source_ids, generation, token_count, created_at, r_compression, c_norm)
398
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
389
399
  )
390
400
  .run(
391
401
  id,
@@ -398,6 +408,8 @@ function storeDistillation(input: {
398
408
  input.generation,
399
409
  tokens,
400
410
  Date.now(),
411
+ input.rCompression ?? null,
412
+ input.cNorm ?? null,
401
413
  );
402
414
  return id;
403
415
  }
@@ -420,7 +432,7 @@ function loadGen0(projectPath: string, sessionID: string): Distillation[] {
420
432
  const pid = ensureProject(projectPath);
421
433
  const rows = db()
422
434
  .query(
423
- "SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at FROM distillations WHERE project_id = ? AND session_id = ? AND generation = 0 AND archived = 0 ORDER BY created_at ASC",
435
+ "SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at, r_compression, c_norm FROM distillations WHERE project_id = ? AND session_id = ? AND generation = 0 AND archived = 0 ORDER BY created_at ASC",
424
436
  )
425
437
  .all(pid, sessionID) as Array<{
426
438
  id: string;
@@ -431,6 +443,8 @@ function loadGen0(projectPath: string, sessionID: string): Distillation[] {
431
443
  generation: number;
432
444
  token_count: number;
433
445
  created_at: number;
446
+ r_compression: number | null;
447
+ c_norm: number | null;
434
448
  }>;
435
449
  return rows.map((r) => ({
436
450
  ...r,
@@ -514,6 +528,17 @@ export async function run(input: {
514
528
  model?: { providerID: string; modelID: string };
515
529
  /** Skip minMessages threshold check — distill whatever is pending */
516
530
  force?: boolean;
531
+ /** Skip meta-distillation even when gen-0 count exceeds the threshold.
532
+ * Used when the upstream prompt cache is likely still warm — meta-distillation
533
+ * rewrites distillation row IDs, which invalidates the distilled prefix cache
534
+ * and causes a cache bust on the next turn. Callers should set this to true
535
+ * when `Date.now() - getLastTurnAt(sessionID) < cacheTTL`. */
536
+ skipMeta?: boolean;
537
+ /** When true, all LLM calls in this run are marked urgent and bypass the
538
+ * batch queue (if one is active). Use for compaction and overflow recovery
539
+ * where the caller is blocking on the result. Background/idle distillation
540
+ * should leave this false to benefit from batch API 50% cost savings. */
541
+ urgent?: boolean;
517
542
  }): Promise<{ rounds: number; distilled: number }> {
518
543
  // Reset orphaned messages (marked distilled by a deleted/migrated distillation)
519
544
  const orphans = resetOrphans(input.projectPath, input.sessionID);
@@ -547,6 +572,7 @@ export async function run(input: {
547
572
  sessionID: input.sessionID,
548
573
  messages: segment,
549
574
  model: input.model,
575
+ urgent: input.urgent,
550
576
  });
551
577
  if (result) {
552
578
  distilled += segment.length;
@@ -555,8 +581,11 @@ export async function run(input: {
555
581
  }
556
582
  }
557
583
 
558
- // Check if meta-distillation is needed
584
+ // Check if meta-distillation is needed (skip when cache is warm to avoid
585
+ // prefix cache invalidation — row IDs change after meta-distill, busting
586
+ // the prompt cache on the next turn).
559
587
  if (
588
+ !input.skipMeta &&
560
589
  gen0Count(input.projectPath, input.sessionID) >=
561
590
  cfg.distillation.metaThreshold
562
591
  ) {
@@ -565,6 +594,7 @@ export async function run(input: {
565
594
  projectPath: input.projectPath,
566
595
  sessionID: input.sessionID,
567
596
  model: input.model,
597
+ urgent: input.urgent,
568
598
  });
569
599
  rounds++;
570
600
  }
@@ -582,6 +612,7 @@ async function distillSegment(input: {
582
612
  sessionID: string;
583
613
  messages: TemporalMessage[];
584
614
  model?: { providerID: string; modelID: string };
615
+ urgent?: boolean;
585
616
  }): Promise<DistillationResult | null> {
586
617
  const prior = latestObservations(input.projectPath, input.sessionID);
587
618
  const text = messagesToText(input.messages);
@@ -604,29 +635,30 @@ async function distillSegment(input: {
604
635
  const responseText = await input.llm.prompt(
605
636
  DISTILLATION_SYSTEM,
606
637
  userContent,
607
- { model, workerID: "lore-distill" },
638
+ { model, workerID: "lore-distill", thinking: false, urgent: input.urgent, sessionID: input.sessionID },
608
639
  );
609
640
  if (!responseText) return null;
610
641
 
611
642
  const result = parseDistillationResult(responseText);
612
643
  if (!result) return null;
613
644
 
645
+ // Compute context health metrics before storing.
646
+ const distilledTokens = Math.ceil(result.observations.length / 3);
647
+ const sourceTokens = input.messages.reduce((sum, m) => sum + m.tokens, 0);
648
+ const rComp = compressionRatio(distilledTokens, sourceTokens);
649
+ const cNorm = temporal.temporalCnorm(input.messages.map((m) => m.created_at));
650
+
614
651
  const distillId = storeDistillation({
615
652
  projectPath: input.projectPath,
616
653
  sessionID: input.sessionID,
617
654
  observations: result.observations,
618
655
  sourceIDs: input.messages.map((m) => m.id),
619
656
  generation: 0,
657
+ rCompression: rComp,
658
+ cNorm,
620
659
  });
621
660
  temporal.markDistilled(input.messages.map((m) => m.id));
622
661
 
623
- // Diagnostic: log compression health and temporal clustering metrics.
624
- // R_compression (k/√N): < 1.0 signals likely lossy distillation.
625
- // C_norm: 0 = uniform timestamps, 1 = dominated by distant past.
626
- const distilledTokens = Math.ceil(result.observations.length / 3);
627
- const sourceTokens = input.messages.reduce((sum, m) => sum + m.tokens, 0);
628
- const rComp = compressionRatio(distilledTokens, sourceTokens);
629
- const cNorm = temporal.temporalCnorm(input.messages.map((m) => m.created_at));
630
662
  log.info(
631
663
  `distill segment: ${input.messages.length} msgs, ` +
632
664
  `${sourceTokens}→${distilledTokens} tokens, ` +
@@ -638,6 +670,24 @@ async function distillSegment(input: {
638
670
  embedding.embedDistillation(distillId, result.observations);
639
671
  }
640
672
 
673
+ // Fire-and-forget: extract decision/preference patterns → knowledge entries
674
+ if (config().knowledge.enabled) {
675
+ for (const pat of extractPatterns(result.observations)) {
676
+ try {
677
+ ltm.create({
678
+ projectPath: input.projectPath,
679
+ category: pat.category,
680
+ title: pat.title,
681
+ content: pat.content,
682
+ session: input.sessionID,
683
+ scope: "project",
684
+ });
685
+ } catch {
686
+ // Dedup guard in ltm.create() handles duplicates — swallow errors
687
+ }
688
+ }
689
+ }
690
+
641
691
  return result;
642
692
  }
643
693
 
@@ -654,6 +704,7 @@ export async function metaDistill(input: {
654
704
  projectPath: string;
655
705
  sessionID: string;
656
706
  model?: { providerID: string; modelID: string };
707
+ urgent?: boolean;
657
708
  }): Promise<DistillationResult | null> {
658
709
  const existing = loadGen0(input.projectPath, input.sessionID);
659
710
 
@@ -681,7 +732,7 @@ export async function metaDistill(input: {
681
732
  const responseText = await input.llm.prompt(
682
733
  RECURSIVE_SYSTEM,
683
734
  userContent,
684
- { model, workerID: "lore-distill" },
735
+ { model, workerID: "lore-distill", thinking: false, urgent: input.urgent, sessionID: input.sessionID },
685
736
  );
686
737
  if (!responseText) return null;
687
738
 
@@ -732,5 +783,91 @@ export async function metaDistill(input: {
732
783
  embedding.embedDistillation(metaId, result.observations);
733
784
  }
734
785
 
786
+ // Fire-and-forget: extract decision/preference patterns → knowledge entries
787
+ if (config().knowledge.enabled) {
788
+ for (const pat of extractPatterns(result.observations)) {
789
+ try {
790
+ ltm.create({
791
+ projectPath: input.projectPath,
792
+ category: pat.category,
793
+ title: pat.title,
794
+ content: pat.content,
795
+ session: input.sessionID,
796
+ scope: "project",
797
+ });
798
+ } catch {
799
+ // Dedup guard in ltm.create() handles duplicates — swallow errors
800
+ }
801
+ }
802
+ }
803
+
735
804
  return result;
736
805
  }
806
+
807
+ // ---------------------------------------------------------------------------
808
+ // Retroactive metric backfill
809
+ // ---------------------------------------------------------------------------
810
+
811
+ /**
812
+ * Backfill `r_compression` and `c_norm` for distillations that were created
813
+ * before schema v12 (or before PR #113 added the computation).
814
+ *
815
+ * For each distillation with NULL metrics, loads source temporal messages via
816
+ * `source_ids`, computes `compressionRatio()` and `temporalCnorm()`, and
817
+ * writes the values back. Skips rows where source messages have been pruned
818
+ * or source_ids is empty.
819
+ *
820
+ * Designed to run once at startup — idempotent (only touches NULL rows).
821
+ * Returns the number of rows updated.
822
+ */
823
+ export function backfillMetrics(): number {
824
+ const rows = db()
825
+ .query(
826
+ "SELECT id, source_ids, token_count FROM distillations WHERE r_compression IS NULL",
827
+ )
828
+ .all() as Array<{
829
+ id: string;
830
+ source_ids: string;
831
+ token_count: number;
832
+ }>;
833
+
834
+ if (!rows.length) return 0;
835
+
836
+ const update = db().prepare(
837
+ "UPDATE distillations SET r_compression = ?, c_norm = ? WHERE id = ?",
838
+ );
839
+
840
+ let updated = 0;
841
+
842
+ for (const row of rows) {
843
+ const sourceIds = parseSourceIds(row.source_ids);
844
+ if (!sourceIds.length) continue;
845
+
846
+ // Load source temporal messages — they may have been pruned.
847
+ const placeholders = sourceIds.map(() => "?").join(",");
848
+ const sources = db()
849
+ .query(
850
+ `SELECT tokens, created_at FROM temporal_messages WHERE id IN (${placeholders})`,
851
+ )
852
+ .all(...sourceIds) as Array<{ tokens: number; created_at: number }>;
853
+
854
+ if (!sources.length) continue;
855
+
856
+ const sourceTokens = sources.reduce((sum, s) => sum + s.tokens, 0);
857
+ const timestamps = sources.map((s) => s.created_at);
858
+
859
+ const rComp = compressionRatio(row.token_count, sourceTokens);
860
+ const cNorm = temporal.temporalCnorm(timestamps);
861
+
862
+ update.run(rComp, cNorm, row.id);
863
+ updated++;
864
+ }
865
+
866
+ if (updated > 0) {
867
+ log.info(
868
+ `backfilled metrics for ${updated} distillations (${rows.length - updated} skipped — missing sources)`,
869
+ );
870
+ }
871
+
872
+ return updated;
873
+ }
package/src/embedding.ts CHANGED
@@ -132,12 +132,82 @@ class OpenAIProvider implements EmbeddingProvider {
132
132
  }
133
133
  }
134
134
 
135
+ // ---------------------------------------------------------------------------
136
+ // Local provider (fastembed + ONNX Runtime)
137
+ // ---------------------------------------------------------------------------
138
+
139
+ /**
140
+ * Local embedding provider using fastembed (bge-small-en-v1.5 by default).
141
+ *
142
+ * No API key required — runs entirely on-device via ONNX Runtime.
143
+ * Model files are downloaded on first use (~33MB) and cached in
144
+ * `~/.cache/fastembed`. Subsequent inits load from disk in ~350ms.
145
+ *
146
+ * Uses dynamic import so the module is only loaded when the "local"
147
+ * provider is actually selected — avoids startup cost and allows
148
+ * graceful fallback if fastembed is not installed.
149
+ */
150
+ class LocalProvider implements EmbeddingProvider {
151
+ readonly maxBatchSize = 256;
152
+ private model: unknown | null = null;
153
+ private initPromise: Promise<unknown> | null = null;
154
+ private modelName: string;
155
+
156
+ constructor(modelName: string) {
157
+ this.modelName = modelName;
158
+ }
159
+
160
+ private async getModel(): Promise<unknown> {
161
+ if (this.model) return this.model;
162
+ if (!this.initPromise) {
163
+ this.initPromise = (async () => {
164
+ const { EmbeddingModel, FlagEmbedding } = await import("fastembed");
165
+ // Map config model string to EmbeddingModel enum value.
166
+ // If the configured model matches an enum key, use it; otherwise try
167
+ // the raw string as a model name (CUSTOM model support in fastembed).
168
+ const enumValue = (EmbeddingModel as Record<string, string>)[this.modelName];
169
+ // fastembed's init() has overloaded signatures expecting specific enum
170
+ // members, but we resolve the model dynamically from config. The enum
171
+ // lookup guarantees a valid value at runtime; cast to satisfy the type.
172
+ const m = await FlagEmbedding.init({
173
+ model: enumValue ?? this.modelName,
174
+ } as { model: typeof EmbeddingModel.BGESmallENV15 });
175
+ this.model = m;
176
+ return m;
177
+ })();
178
+ }
179
+ return this.initPromise;
180
+ }
181
+
182
+ async embed(texts: string[], inputType: "document" | "query"): Promise<Float32Array[]> {
183
+ const model = (await this.getModel()) as {
184
+ queryEmbed(text: string): Promise<number[]>;
185
+ passageEmbed(texts: string[], batchSize?: number): AsyncGenerator<number[][]>;
186
+ };
187
+
188
+ if (inputType === "query" && texts.length === 1) {
189
+ const vec = await model.queryEmbed(texts[0]);
190
+ return [new Float32Array(vec)];
191
+ }
192
+
193
+ // passageEmbed returns an async generator of batches
194
+ const results: Float32Array[] = [];
195
+ for await (const batch of model.passageEmbed(texts)) {
196
+ for (const vec of batch) {
197
+ results.push(new Float32Array(vec));
198
+ }
199
+ }
200
+ return results;
201
+ }
202
+ }
203
+
135
204
  // ---------------------------------------------------------------------------
136
205
  // Provider resolution
137
206
  // ---------------------------------------------------------------------------
138
207
 
139
208
  /** Default models per provider — used when config doesn't override. */
140
209
  const PROVIDER_DEFAULTS: Record<string, { model: string; dimensions: number }> = {
210
+ local: { model: "BGESmallENV15", dimensions: 384 },
141
211
  voyage: { model: "voyage-code-3", dimensions: 1024 },
142
212
  openai: { model: "text-embedding-3-small", dimensions: 1536 },
143
213
  };
@@ -165,23 +235,36 @@ function getProvider(): EmbeddingProvider | null {
165
235
  }
166
236
 
167
237
  const providerName = cfg.provider;
168
- const apiKey = getProviderApiKey(providerName);
169
- if (!apiKey) {
170
- cachedProvider = null;
171
- return null;
172
- }
173
-
174
- const defaults = PROVIDER_DEFAULTS[providerName];
175
- const model = cfg.model === defaults?.model ? cfg.model : cfg.model;
176
- const dimensions = cfg.dimensions;
238
+ const model = cfg.model;
177
239
 
178
240
  switch (providerName) {
179
- case "voyage":
180
- cachedProvider = new VoyageProvider(apiKey, model, dimensions);
241
+ case "local": {
242
+ try {
243
+ cachedProvider = new LocalProvider(model);
244
+ } catch {
245
+ log.info("local embedding provider unavailable (fastembed not installed)");
246
+ cachedProvider = null;
247
+ }
181
248
  break;
182
- case "openai":
183
- cachedProvider = new OpenAIProvider(apiKey, model, dimensions);
249
+ }
250
+ case "voyage": {
251
+ const apiKey = getProviderApiKey(providerName);
252
+ if (!apiKey) {
253
+ cachedProvider = null;
254
+ return null;
255
+ }
256
+ cachedProvider = new VoyageProvider(apiKey, model, cfg.dimensions);
184
257
  break;
258
+ }
259
+ case "openai": {
260
+ const apiKey = getProviderApiKey(providerName);
261
+ if (!apiKey) {
262
+ cachedProvider = null;
263
+ return null;
264
+ }
265
+ cachedProvider = new OpenAIProvider(apiKey, model, cfg.dimensions);
266
+ break;
267
+ }
185
268
  default:
186
269
  log.info(`unknown embedding provider: ${providerName}`);
187
270
  cachedProvider = null;
@@ -433,13 +516,74 @@ export function checkConfigChange(): boolean {
433
516
  return true;
434
517
  }
435
518
 
519
+ // ---------------------------------------------------------------------------
520
+ // Startup backfill — single entry point for all hosts
521
+ // ---------------------------------------------------------------------------
522
+
523
+ /**
524
+ * Run all embedding backfills and log coverage stats.
525
+ *
526
+ * This is the canonical entry point that every host adapter (OpenCode, Pi,
527
+ * future ACP) should call once during init. It:
528
+ * 1. Detects config changes (provider swap) and clears stale embeddings
529
+ * 2. Backfills knowledge entries missing embeddings
530
+ * 3. Backfills non-archived distillations missing embeddings
531
+ * 4. Logs a one-line coverage summary to stderr (always visible, not gated)
532
+ *
533
+ * Fire-and-forget: callers should `.catch()` — embedding failures must not
534
+ * block plugin initialization.
535
+ */
536
+ export async function runStartupBackfill(): Promise<void> {
537
+ if (!isAvailable()) return;
538
+
539
+ const knowledgeEmbedded = await backfillEmbeddings();
540
+ const distillationEmbedded = await backfillDistillationEmbeddings();
541
+
542
+ // Coverage stats — always log to stderr so the problem is visible.
543
+ const kTotal = (
544
+ db()
545
+ .query("SELECT COUNT(*) as n FROM knowledge WHERE confidence > 0.2")
546
+ .get() as { n: number }
547
+ ).n;
548
+ const kWithEmb = (
549
+ db()
550
+ .query(
551
+ "SELECT COUNT(*) as n FROM knowledge WHERE embedding IS NOT NULL AND confidence > 0.2",
552
+ )
553
+ .get() as { n: number }
554
+ ).n;
555
+ const dTotal = (
556
+ db()
557
+ .query(
558
+ "SELECT COUNT(*) as n FROM distillations WHERE archived = 0 AND observations != ''",
559
+ )
560
+ .get() as { n: number }
561
+ ).n;
562
+ const dWithEmb = (
563
+ db()
564
+ .query(
565
+ "SELECT COUNT(*) as n FROM distillations WHERE embedding IS NOT NULL AND archived = 0",
566
+ )
567
+ .get() as { n: number }
568
+ ).n;
569
+
570
+ const parts: string[] = [];
571
+ if (knowledgeEmbedded > 0 || distillationEmbedded > 0) {
572
+ parts.push(`backfilled ${knowledgeEmbedded} knowledge + ${distillationEmbedded} distillations`);
573
+ }
574
+ parts.push(
575
+ `coverage: knowledge ${kWithEmb}/${kTotal}, distillations ${dWithEmb}/${dTotal}`,
576
+ );
577
+ log.info(`embedding startup: ${parts.join("; ")}`);
578
+ }
579
+
436
580
  // ---------------------------------------------------------------------------
437
581
  // Backfill — knowledge
438
582
  // ---------------------------------------------------------------------------
439
583
 
440
584
  /**
441
585
  * Embed all knowledge entries that are missing embeddings.
442
- * Called on startup when embeddings are first enabled.
586
+ * Called by `runStartupBackfill()`.
443
587
  * Also handles config changes: if provider/model/dimensions changed, clears
444
588
  * stale embeddings first, then re-embeds all entries.
445
589
  * Returns the number of entries embedded.