@loreai/core 0.11.1 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/dist/bun/agents-file.d.ts +29 -8
  2. package/dist/bun/agents-file.d.ts.map +1 -1
  3. package/dist/bun/config.d.ts +1 -0
  4. package/dist/bun/config.d.ts.map +1 -1
  5. package/dist/bun/db.d.ts.map +1 -1
  6. package/dist/bun/distillation.d.ts +55 -0
  7. package/dist/bun/distillation.d.ts.map +1 -1
  8. package/dist/bun/embedding.d.ts +15 -1
  9. package/dist/bun/embedding.d.ts.map +1 -1
  10. package/dist/bun/gradient.d.ts +53 -5
  11. package/dist/bun/gradient.d.ts.map +1 -1
  12. package/dist/bun/index.d.ts +4 -4
  13. package/dist/bun/index.d.ts.map +1 -1
  14. package/dist/bun/index.js +799 -256
  15. package/dist/bun/index.js.map +4 -4
  16. package/dist/bun/pattern-extract.d.ts +36 -0
  17. package/dist/bun/pattern-extract.d.ts.map +1 -0
  18. package/dist/bun/recall.d.ts +1 -0
  19. package/dist/bun/recall.d.ts.map +1 -1
  20. package/dist/bun/search.d.ts +13 -1
  21. package/dist/bun/search.d.ts.map +1 -1
  22. package/dist/bun/temporal.d.ts +15 -0
  23. package/dist/bun/temporal.d.ts.map +1 -1
  24. package/dist/bun/types.d.ts +41 -1
  25. package/dist/bun/types.d.ts.map +1 -1
  26. package/dist/bun/worker-model.d.ts +22 -0
  27. package/dist/bun/worker-model.d.ts.map +1 -1
  28. package/dist/node/agents-file.d.ts +29 -8
  29. package/dist/node/agents-file.d.ts.map +1 -1
  30. package/dist/node/config.d.ts +1 -0
  31. package/dist/node/config.d.ts.map +1 -1
  32. package/dist/node/db.d.ts.map +1 -1
  33. package/dist/node/distillation.d.ts +55 -0
  34. package/dist/node/distillation.d.ts.map +1 -1
  35. package/dist/node/embedding.d.ts +15 -1
  36. package/dist/node/embedding.d.ts.map +1 -1
  37. package/dist/node/gradient.d.ts +53 -5
  38. package/dist/node/gradient.d.ts.map +1 -1
  39. package/dist/node/index.d.ts +4 -4
  40. package/dist/node/index.d.ts.map +1 -1
  41. package/dist/node/index.js +799 -256
  42. package/dist/node/index.js.map +4 -4
  43. package/dist/node/pattern-extract.d.ts +36 -0
  44. package/dist/node/pattern-extract.d.ts.map +1 -0
  45. package/dist/node/recall.d.ts +1 -0
  46. package/dist/node/recall.d.ts.map +1 -1
  47. package/dist/node/search.d.ts +13 -1
  48. package/dist/node/search.d.ts.map +1 -1
  49. package/dist/node/temporal.d.ts +15 -0
  50. package/dist/node/temporal.d.ts.map +1 -1
  51. package/dist/node/types.d.ts +41 -1
  52. package/dist/node/types.d.ts.map +1 -1
  53. package/dist/node/worker-model.d.ts +22 -0
  54. package/dist/node/worker-model.d.ts.map +1 -1
  55. package/dist/types/agents-file.d.ts +29 -8
  56. package/dist/types/agents-file.d.ts.map +1 -1
  57. package/dist/types/config.d.ts +1 -0
  58. package/dist/types/config.d.ts.map +1 -1
  59. package/dist/types/db.d.ts.map +1 -1
  60. package/dist/types/distillation.d.ts +55 -0
  61. package/dist/types/distillation.d.ts.map +1 -1
  62. package/dist/types/embedding.d.ts +15 -1
  63. package/dist/types/embedding.d.ts.map +1 -1
  64. package/dist/types/gradient.d.ts +53 -5
  65. package/dist/types/gradient.d.ts.map +1 -1
  66. package/dist/types/index.d.ts +4 -4
  67. package/dist/types/index.d.ts.map +1 -1
  68. package/dist/types/pattern-extract.d.ts +36 -0
  69. package/dist/types/pattern-extract.d.ts.map +1 -0
  70. package/dist/types/recall.d.ts +1 -0
  71. package/dist/types/recall.d.ts.map +1 -1
  72. package/dist/types/search.d.ts +13 -1
  73. package/dist/types/search.d.ts.map +1 -1
  74. package/dist/types/temporal.d.ts +15 -0
  75. package/dist/types/temporal.d.ts.map +1 -1
  76. package/dist/types/types.d.ts +41 -1
  77. package/dist/types/types.d.ts.map +1 -1
  78. package/dist/types/worker-model.d.ts +22 -0
  79. package/dist/types/worker-model.d.ts.map +1 -1
  80. package/package.json +3 -2
  81. package/src/agents-file.ts +111 -28
  82. package/src/config.ts +25 -18
  83. package/src/curator.ts +2 -2
  84. package/src/db.ts +83 -4
  85. package/src/distillation.ts +270 -27
  86. package/src/embedding.ts +158 -14
  87. package/src/gradient.ts +398 -227
  88. package/src/index.ts +13 -5
  89. package/src/pattern-extract.ts +108 -0
  90. package/src/recall.ts +142 -6
  91. package/src/search.ts +37 -1
  92. package/src/temporal.ts +39 -0
  93. package/src/types.ts +41 -1
  94. package/src/worker-model.ts +142 -5
package/src/embedding.ts CHANGED
@@ -132,12 +132,82 @@ class OpenAIProvider implements EmbeddingProvider {
132
132
  }
133
133
  }
134
134
 
135
+ // ---------------------------------------------------------------------------
136
+ // Local provider (fastembed + ONNX Runtime)
137
+ // ---------------------------------------------------------------------------
138
+
139
+ /**
140
+ * Local embedding provider using fastembed (bge-small-en-v1.5 by default).
141
+ *
142
+ * No API key required — runs entirely on-device via ONNX Runtime.
143
+ * Model files are downloaded on first use (~33MB) and cached in
144
+ * `~/.cache/fastembed`. Subsequent inits load from disk in ~350ms.
145
+ *
146
+ * Uses dynamic import so the module is only loaded when the "local"
147
+ * provider is actually selected — avoids startup cost and allows
148
+ * graceful fallback if fastembed is not installed.
149
+ */
150
+ class LocalProvider implements EmbeddingProvider {
151
+ readonly maxBatchSize = 256;
152
+ private model: unknown | null = null;
153
+ private initPromise: Promise<unknown> | null = null;
154
+ private modelName: string;
155
+
156
+ constructor(modelName: string) {
157
+ this.modelName = modelName;
158
+ }
159
+
160
+ private async getModel(): Promise<unknown> {
161
+ if (this.model) return this.model;
162
+ if (!this.initPromise) {
163
+ this.initPromise = (async () => {
164
+ const { EmbeddingModel, FlagEmbedding } = await import("fastembed");
165
+ // Map config model string to EmbeddingModel enum value.
166
+ // If the configured model matches an enum key, use it; otherwise try
167
+ // the raw string as a model name (CUSTOM model support in fastembed).
168
+ const enumValue = (EmbeddingModel as Record<string, string>)[this.modelName];
169
+ // fastembed's init() has overloaded signatures expecting specific enum
170
+ // members, but we resolve the model dynamically from config. The enum
171
+ // lookup guarantees a valid value at runtime; cast to satisfy the type.
172
+ const m = await FlagEmbedding.init({
173
+ model: enumValue ?? this.modelName,
174
+ } as { model: typeof EmbeddingModel.BGESmallENV15 });
175
+ this.model = m;
176
+ return m;
177
+ })();
178
+ }
179
+ return this.initPromise;
180
+ }
181
+
182
+ async embed(texts: string[], inputType: "document" | "query"): Promise<Float32Array[]> {
183
+ const model = (await this.getModel()) as {
184
+ queryEmbed(text: string): Promise<number[]>;
185
+ passageEmbed(texts: string[], batchSize?: number): AsyncGenerator<number[][]>;
186
+ };
187
+
188
+ if (inputType === "query" && texts.length === 1) {
189
+ const vec = await model.queryEmbed(texts[0]);
190
+ return [new Float32Array(vec)];
191
+ }
192
+
193
+ // passageEmbed returns an async generator of batches
194
+ const results: Float32Array[] = [];
195
+ for await (const batch of model.passageEmbed(texts)) {
196
+ for (const vec of batch) {
197
+ results.push(new Float32Array(vec));
198
+ }
199
+ }
200
+ return results;
201
+ }
202
+ }
203
+
135
204
  // ---------------------------------------------------------------------------
136
205
  // Provider resolution
137
206
  // ---------------------------------------------------------------------------
138
207
 
139
208
  /** Default models per provider — used when config doesn't override. */
140
209
  const PROVIDER_DEFAULTS: Record<string, { model: string; dimensions: number }> = {
210
+ local: { model: "BGESmallENV15", dimensions: 384 },
141
211
  voyage: { model: "voyage-code-3", dimensions: 1024 },
142
212
  openai: { model: "text-embedding-3-small", dimensions: 1536 },
143
213
  };
@@ -165,23 +235,36 @@ function getProvider(): EmbeddingProvider | null {
165
235
  }
166
236
 
167
237
  const providerName = cfg.provider;
168
- const apiKey = getProviderApiKey(providerName);
169
- if (!apiKey) {
170
- cachedProvider = null;
171
- return null;
172
- }
173
-
174
- const defaults = PROVIDER_DEFAULTS[providerName];
175
- const model = cfg.model === defaults?.model ? cfg.model : cfg.model;
176
- const dimensions = cfg.dimensions;
238
+ const model = cfg.model;
177
239
 
178
240
  switch (providerName) {
179
- case "voyage":
180
- cachedProvider = new VoyageProvider(apiKey, model, dimensions);
241
+ case "local": {
242
+ try {
243
+ cachedProvider = new LocalProvider(model);
244
+ } catch {
245
+ log.info("local embedding provider unavailable (fastembed not installed)");
246
+ cachedProvider = null;
247
+ }
181
248
  break;
182
- case "openai":
183
- cachedProvider = new OpenAIProvider(apiKey, model, dimensions);
249
+ }
250
+ case "voyage": {
251
+ const apiKey = getProviderApiKey(providerName);
252
+ if (!apiKey) {
253
+ cachedProvider = null;
254
+ return null;
255
+ }
256
+ cachedProvider = new VoyageProvider(apiKey, model, cfg.dimensions);
184
257
  break;
258
+ }
259
+ case "openai": {
260
+ const apiKey = getProviderApiKey(providerName);
261
+ if (!apiKey) {
262
+ cachedProvider = null;
263
+ return null;
264
+ }
265
+ cachedProvider = new OpenAIProvider(apiKey, model, cfg.dimensions);
266
+ break;
267
+ }
185
268
  default:
186
269
  log.info(`unknown embedding provider: ${providerName}`);
187
270
  cachedProvider = null;
@@ -433,13 +516,74 @@ export function checkConfigChange(): boolean {
433
516
  return true;
434
517
  }
435
518
 
519
+ // ---------------------------------------------------------------------------
520
+ // Startup backfill — single entry point for all hosts
521
+ // ---------------------------------------------------------------------------
522
+
523
+ /**
524
+ * Run all embedding backfills and log coverage stats.
525
+ *
526
+ * This is the canonical entry point that every host adapter (OpenCode, Pi,
527
+ * future ACP) should call once during init. It:
528
+ * 1. Detects config changes (provider swap) and clears stale embeddings
529
+ * 2. Backfills knowledge entries missing embeddings
530
+ * 3. Backfills non-archived distillations missing embeddings
531
+ * 4. Logs a one-line coverage summary to stderr (always visible, not gated)
532
+ *
533
+ * Fire-and-forget: callers should `.catch()` — embedding failures must not
534
+ * block plugin initialization.
535
+ */
536
+ export async function runStartupBackfill(): Promise<void> {
537
+ if (!isAvailable()) return;
538
+
539
+ const knowledgeEmbedded = await backfillEmbeddings();
540
+ const distillationEmbedded = await backfillDistillationEmbeddings();
541
+
542
+ // Coverage stats — always log to stderr so the problem is visible.
543
+ const kTotal = (
544
+ db()
545
+ .query("SELECT COUNT(*) as n FROM knowledge WHERE confidence > 0.2")
546
+ .get() as { n: number }
547
+ ).n;
548
+ const kWithEmb = (
549
+ db()
550
+ .query(
551
+ "SELECT COUNT(*) as n FROM knowledge WHERE embedding IS NOT NULL AND confidence > 0.2",
552
+ )
553
+ .get() as { n: number }
554
+ ).n;
555
+ const dTotal = (
556
+ db()
557
+ .query(
558
+ "SELECT COUNT(*) as n FROM distillations WHERE archived = 0 AND observations != ''",
559
+ )
560
+ .get() as { n: number }
561
+ ).n;
562
+ const dWithEmb = (
563
+ db()
564
+ .query(
565
+ "SELECT COUNT(*) as n FROM distillations WHERE embedding IS NOT NULL AND archived = 0",
566
+ )
567
+ .get() as { n: number }
568
+ ).n;
569
+
570
+ const parts: string[] = [];
571
+ if (knowledgeEmbedded > 0 || distillationEmbedded > 0) {
572
+ parts.push(`backfilled ${knowledgeEmbedded} knowledge + ${distillationEmbedded} distillations`);
573
+ }
574
+ parts.push(
575
+ `coverage: knowledge ${kWithEmb}/${kTotal}, distillations ${dWithEmb}/${dTotal}`,
576
+ );
577
+ log.info(`embedding startup: ${parts.join("; ")}`);
578
+ }
579
+
436
580
  // ---------------------------------------------------------------------------
437
581
  // Backfill — knowledge
438
582
  // ---------------------------------------------------------------------------
439
583
 
440
584
  /**
441
585
  * Embed all knowledge entries that are missing embeddings.
442
- * Called on startup when embeddings are first enabled.
586
+ * Called by `runStartupBackfill()`.
443
587
  * Also handles config changes: if provider/model/dimensions changed, clears
444
588
  * stale embeddings first, then re-embeds all entries.
445
589
  * Returns the number of entries embedded.