@loreai/core 0.11.1 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bun/agents-file.d.ts +29 -8
- package/dist/bun/agents-file.d.ts.map +1 -1
- package/dist/bun/config.d.ts +1 -0
- package/dist/bun/config.d.ts.map +1 -1
- package/dist/bun/db.d.ts.map +1 -1
- package/dist/bun/distillation.d.ts +55 -0
- package/dist/bun/distillation.d.ts.map +1 -1
- package/dist/bun/embedding.d.ts +15 -1
- package/dist/bun/embedding.d.ts.map +1 -1
- package/dist/bun/gradient.d.ts +53 -5
- package/dist/bun/gradient.d.ts.map +1 -1
- package/dist/bun/index.d.ts +4 -4
- package/dist/bun/index.d.ts.map +1 -1
- package/dist/bun/index.js +799 -256
- package/dist/bun/index.js.map +4 -4
- package/dist/bun/pattern-extract.d.ts +36 -0
- package/dist/bun/pattern-extract.d.ts.map +1 -0
- package/dist/bun/recall.d.ts +1 -0
- package/dist/bun/recall.d.ts.map +1 -1
- package/dist/bun/search.d.ts +13 -1
- package/dist/bun/search.d.ts.map +1 -1
- package/dist/bun/temporal.d.ts +15 -0
- package/dist/bun/temporal.d.ts.map +1 -1
- package/dist/bun/types.d.ts +41 -1
- package/dist/bun/types.d.ts.map +1 -1
- package/dist/bun/worker-model.d.ts +22 -0
- package/dist/bun/worker-model.d.ts.map +1 -1
- package/dist/node/agents-file.d.ts +29 -8
- package/dist/node/agents-file.d.ts.map +1 -1
- package/dist/node/config.d.ts +1 -0
- package/dist/node/config.d.ts.map +1 -1
- package/dist/node/db.d.ts.map +1 -1
- package/dist/node/distillation.d.ts +55 -0
- package/dist/node/distillation.d.ts.map +1 -1
- package/dist/node/embedding.d.ts +15 -1
- package/dist/node/embedding.d.ts.map +1 -1
- package/dist/node/gradient.d.ts +53 -5
- package/dist/node/gradient.d.ts.map +1 -1
- package/dist/node/index.d.ts +4 -4
- package/dist/node/index.d.ts.map +1 -1
- package/dist/node/index.js +799 -256
- package/dist/node/index.js.map +4 -4
- package/dist/node/pattern-extract.d.ts +36 -0
- package/dist/node/pattern-extract.d.ts.map +1 -0
- package/dist/node/recall.d.ts +1 -0
- package/dist/node/recall.d.ts.map +1 -1
- package/dist/node/search.d.ts +13 -1
- package/dist/node/search.d.ts.map +1 -1
- package/dist/node/temporal.d.ts +15 -0
- package/dist/node/temporal.d.ts.map +1 -1
- package/dist/node/types.d.ts +41 -1
- package/dist/node/types.d.ts.map +1 -1
- package/dist/node/worker-model.d.ts +22 -0
- package/dist/node/worker-model.d.ts.map +1 -1
- package/dist/types/agents-file.d.ts +29 -8
- package/dist/types/agents-file.d.ts.map +1 -1
- package/dist/types/config.d.ts +1 -0
- package/dist/types/config.d.ts.map +1 -1
- package/dist/types/db.d.ts.map +1 -1
- package/dist/types/distillation.d.ts +55 -0
- package/dist/types/distillation.d.ts.map +1 -1
- package/dist/types/embedding.d.ts +15 -1
- package/dist/types/embedding.d.ts.map +1 -1
- package/dist/types/gradient.d.ts +53 -5
- package/dist/types/gradient.d.ts.map +1 -1
- package/dist/types/index.d.ts +4 -4
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/pattern-extract.d.ts +36 -0
- package/dist/types/pattern-extract.d.ts.map +1 -0
- package/dist/types/recall.d.ts +1 -0
- package/dist/types/recall.d.ts.map +1 -1
- package/dist/types/search.d.ts +13 -1
- package/dist/types/search.d.ts.map +1 -1
- package/dist/types/temporal.d.ts +15 -0
- package/dist/types/temporal.d.ts.map +1 -1
- package/dist/types/types.d.ts +41 -1
- package/dist/types/types.d.ts.map +1 -1
- package/dist/types/worker-model.d.ts +22 -0
- package/dist/types/worker-model.d.ts.map +1 -1
- package/package.json +3 -2
- package/src/agents-file.ts +111 -28
- package/src/config.ts +25 -18
- package/src/curator.ts +2 -2
- package/src/db.ts +83 -4
- package/src/distillation.ts +270 -27
- package/src/embedding.ts +158 -14
- package/src/gradient.ts +398 -227
- package/src/index.ts +13 -5
- package/src/pattern-extract.ts +108 -0
- package/src/recall.ts +142 -6
- package/src/search.ts +37 -1
- package/src/temporal.ts +39 -0
- package/src/types.ts +41 -1
- package/src/worker-model.ts +142 -5
package/src/embedding.ts
CHANGED
|
@@ -132,12 +132,82 @@ class OpenAIProvider implements EmbeddingProvider {
|
|
|
132
132
|
}
|
|
133
133
|
}
|
|
134
134
|
|
|
135
|
+
// ---------------------------------------------------------------------------
|
|
136
|
+
// Local provider (fastembed + ONNX Runtime)
|
|
137
|
+
// ---------------------------------------------------------------------------
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Local embedding provider using fastembed (bge-small-en-v1.5 by default).
|
|
141
|
+
*
|
|
142
|
+
* No API key required — runs entirely on-device via ONNX Runtime.
|
|
143
|
+
* Model files are downloaded on first use (~33MB) and cached in
|
|
144
|
+
* `~/.cache/fastembed`. Subsequent inits load from disk in ~350ms.
|
|
145
|
+
*
|
|
146
|
+
* Uses dynamic import so the module is only loaded when the "local"
|
|
147
|
+
* provider is actually selected — avoids startup cost and allows
|
|
148
|
+
* graceful fallback if fastembed is not installed.
|
|
149
|
+
*/
|
|
150
|
+
class LocalProvider implements EmbeddingProvider {
|
|
151
|
+
readonly maxBatchSize = 256;
|
|
152
|
+
private model: unknown | null = null;
|
|
153
|
+
private initPromise: Promise<unknown> | null = null;
|
|
154
|
+
private modelName: string;
|
|
155
|
+
|
|
156
|
+
constructor(modelName: string) {
|
|
157
|
+
this.modelName = modelName;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
private async getModel(): Promise<unknown> {
|
|
161
|
+
if (this.model) return this.model;
|
|
162
|
+
if (!this.initPromise) {
|
|
163
|
+
this.initPromise = (async () => {
|
|
164
|
+
const { EmbeddingModel, FlagEmbedding } = await import("fastembed");
|
|
165
|
+
// Map config model string to EmbeddingModel enum value.
|
|
166
|
+
// If the configured model matches an enum key, use it; otherwise try
|
|
167
|
+
// the raw string as a model name (CUSTOM model support in fastembed).
|
|
168
|
+
const enumValue = (EmbeddingModel as Record<string, string>)[this.modelName];
|
|
169
|
+
// fastembed's init() has overloaded signatures expecting specific enum
|
|
170
|
+
// members, but we resolve the model dynamically from config. The enum
|
|
171
|
+
// lookup guarantees a valid value at runtime; cast to satisfy the type.
|
|
172
|
+
const m = await FlagEmbedding.init({
|
|
173
|
+
model: enumValue ?? this.modelName,
|
|
174
|
+
} as { model: typeof EmbeddingModel.BGESmallENV15 });
|
|
175
|
+
this.model = m;
|
|
176
|
+
return m;
|
|
177
|
+
})();
|
|
178
|
+
}
|
|
179
|
+
return this.initPromise;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
async embed(texts: string[], inputType: "document" | "query"): Promise<Float32Array[]> {
|
|
183
|
+
const model = (await this.getModel()) as {
|
|
184
|
+
queryEmbed(text: string): Promise<number[]>;
|
|
185
|
+
passageEmbed(texts: string[], batchSize?: number): AsyncGenerator<number[][]>;
|
|
186
|
+
};
|
|
187
|
+
|
|
188
|
+
if (inputType === "query" && texts.length === 1) {
|
|
189
|
+
const vec = await model.queryEmbed(texts[0]);
|
|
190
|
+
return [new Float32Array(vec)];
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// passageEmbed returns an async generator of batches
|
|
194
|
+
const results: Float32Array[] = [];
|
|
195
|
+
for await (const batch of model.passageEmbed(texts)) {
|
|
196
|
+
for (const vec of batch) {
|
|
197
|
+
results.push(new Float32Array(vec));
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
return results;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
135
204
|
// ---------------------------------------------------------------------------
|
|
136
205
|
// Provider resolution
|
|
137
206
|
// ---------------------------------------------------------------------------
|
|
138
207
|
|
|
139
208
|
/** Default models per provider — used when config doesn't override. */
|
|
140
209
|
const PROVIDER_DEFAULTS: Record<string, { model: string; dimensions: number }> = {
|
|
210
|
+
local: { model: "BGESmallENV15", dimensions: 384 },
|
|
141
211
|
voyage: { model: "voyage-code-3", dimensions: 1024 },
|
|
142
212
|
openai: { model: "text-embedding-3-small", dimensions: 1536 },
|
|
143
213
|
};
|
|
@@ -165,23 +235,36 @@ function getProvider(): EmbeddingProvider | null {
|
|
|
165
235
|
}
|
|
166
236
|
|
|
167
237
|
const providerName = cfg.provider;
|
|
168
|
-
const
|
|
169
|
-
if (!apiKey) {
|
|
170
|
-
cachedProvider = null;
|
|
171
|
-
return null;
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
const defaults = PROVIDER_DEFAULTS[providerName];
|
|
175
|
-
const model = cfg.model === defaults?.model ? cfg.model : cfg.model;
|
|
176
|
-
const dimensions = cfg.dimensions;
|
|
238
|
+
const model = cfg.model;
|
|
177
239
|
|
|
178
240
|
switch (providerName) {
|
|
179
|
-
case "
|
|
180
|
-
|
|
241
|
+
case "local": {
|
|
242
|
+
try {
|
|
243
|
+
cachedProvider = new LocalProvider(model);
|
|
244
|
+
} catch {
|
|
245
|
+
log.info("local embedding provider unavailable (fastembed not installed)");
|
|
246
|
+
cachedProvider = null;
|
|
247
|
+
}
|
|
181
248
|
break;
|
|
182
|
-
|
|
183
|
-
|
|
249
|
+
}
|
|
250
|
+
case "voyage": {
|
|
251
|
+
const apiKey = getProviderApiKey(providerName);
|
|
252
|
+
if (!apiKey) {
|
|
253
|
+
cachedProvider = null;
|
|
254
|
+
return null;
|
|
255
|
+
}
|
|
256
|
+
cachedProvider = new VoyageProvider(apiKey, model, cfg.dimensions);
|
|
184
257
|
break;
|
|
258
|
+
}
|
|
259
|
+
case "openai": {
|
|
260
|
+
const apiKey = getProviderApiKey(providerName);
|
|
261
|
+
if (!apiKey) {
|
|
262
|
+
cachedProvider = null;
|
|
263
|
+
return null;
|
|
264
|
+
}
|
|
265
|
+
cachedProvider = new OpenAIProvider(apiKey, model, cfg.dimensions);
|
|
266
|
+
break;
|
|
267
|
+
}
|
|
185
268
|
default:
|
|
186
269
|
log.info(`unknown embedding provider: ${providerName}`);
|
|
187
270
|
cachedProvider = null;
|
|
@@ -433,13 +516,74 @@ export function checkConfigChange(): boolean {
|
|
|
433
516
|
return true;
|
|
434
517
|
}
|
|
435
518
|
|
|
519
|
+
// ---------------------------------------------------------------------------
|
|
520
|
+
// Startup backfill — single entry point for all hosts
|
|
521
|
+
// ---------------------------------------------------------------------------
|
|
522
|
+
|
|
523
|
+
/**
|
|
524
|
+
* Run all embedding backfills and log coverage stats.
|
|
525
|
+
*
|
|
526
|
+
* This is the canonical entry point that every host adapter (OpenCode, Pi,
|
|
527
|
+
* future ACP) should call once during init. It:
|
|
528
|
+
* 1. Detects config changes (provider swap) and clears stale embeddings
|
|
529
|
+
* 2. Backfills knowledge entries missing embeddings
|
|
530
|
+
* 3. Backfills non-archived distillations missing embeddings
|
|
531
|
+
* 4. Logs a one-line coverage summary to stderr (always visible, not gated)
|
|
532
|
+
*
|
|
533
|
+
* Fire-and-forget: callers should `.catch()` — embedding failures must not
|
|
534
|
+
* block plugin initialization.
|
|
535
|
+
*/
|
|
536
|
+
export async function runStartupBackfill(): Promise<void> {
|
|
537
|
+
if (!isAvailable()) return;
|
|
538
|
+
|
|
539
|
+
const knowledgeEmbedded = await backfillEmbeddings();
|
|
540
|
+
const distillationEmbedded = await backfillDistillationEmbeddings();
|
|
541
|
+
|
|
542
|
+
// Coverage stats — always log to stderr so the problem is visible.
|
|
543
|
+
const kTotal = (
|
|
544
|
+
db()
|
|
545
|
+
.query("SELECT COUNT(*) as n FROM knowledge WHERE confidence > 0.2")
|
|
546
|
+
.get() as { n: number }
|
|
547
|
+
).n;
|
|
548
|
+
const kWithEmb = (
|
|
549
|
+
db()
|
|
550
|
+
.query(
|
|
551
|
+
"SELECT COUNT(*) as n FROM knowledge WHERE embedding IS NOT NULL AND confidence > 0.2",
|
|
552
|
+
)
|
|
553
|
+
.get() as { n: number }
|
|
554
|
+
).n;
|
|
555
|
+
const dTotal = (
|
|
556
|
+
db()
|
|
557
|
+
.query(
|
|
558
|
+
"SELECT COUNT(*) as n FROM distillations WHERE archived = 0 AND observations != ''",
|
|
559
|
+
)
|
|
560
|
+
.get() as { n: number }
|
|
561
|
+
).n;
|
|
562
|
+
const dWithEmb = (
|
|
563
|
+
db()
|
|
564
|
+
.query(
|
|
565
|
+
"SELECT COUNT(*) as n FROM distillations WHERE embedding IS NOT NULL AND archived = 0",
|
|
566
|
+
)
|
|
567
|
+
.get() as { n: number }
|
|
568
|
+
).n;
|
|
569
|
+
|
|
570
|
+
const parts: string[] = [];
|
|
571
|
+
if (knowledgeEmbedded > 0 || distillationEmbedded > 0) {
|
|
572
|
+
parts.push(`backfilled ${knowledgeEmbedded} knowledge + ${distillationEmbedded} distillations`);
|
|
573
|
+
}
|
|
574
|
+
parts.push(
|
|
575
|
+
`coverage: knowledge ${kWithEmb}/${kTotal}, distillations ${dWithEmb}/${dTotal}`,
|
|
576
|
+
);
|
|
577
|
+
log.info(`embedding startup: ${parts.join("; ")}`);
|
|
578
|
+
}
|
|
579
|
+
|
|
436
580
|
// ---------------------------------------------------------------------------
|
|
437
581
|
// Backfill — knowledge
|
|
438
582
|
// ---------------------------------------------------------------------------
|
|
439
583
|
|
|
440
584
|
/**
|
|
441
585
|
* Embed all knowledge entries that are missing embeddings.
|
|
442
|
-
* Called
|
|
586
|
+
* Called by `runStartupBackfill()`.
|
|
443
587
|
* Also handles config changes: if provider/model/dimensions changed, clears
|
|
444
588
|
* stale embeddings first, then re-embeds all entries.
|
|
445
589
|
* Returns the number of entries embedded.
|